bitkeeper revision 1.1548 (4294554btfa2GpomqV57KFpxEHsjEA)
authorkaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
Wed, 25 May 2005 10:36:59 +0000 (10:36 +0000)
committerkaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
Wed, 25 May 2005 10:36:59 +0000 (10:36 +0000)
Move to Linux's cpumask_t and 'hotplug' multi-processor booting
interfaces. This also brings apic.c and various other files closer to
their Linux 2.6 equivalents. Simplified the scheduler interfaces a
little (particularly per-cpu and idle-domain initialisation).
Signed-off-by: Keir Fraser <keir@xensource.com>
41 files changed:
xen/arch/ia64/domain.c
xen/arch/ia64/xensetup.c
xen/arch/x86/acpi/boot.c
xen/arch/x86/apic.c
xen/arch/x86/cdb.c
xen/arch/x86/dom0_ops.c
xen/arch/x86/domain.c
xen/arch/x86/domain_build.c
xen/arch/x86/io_apic.c
xen/arch/x86/irq.c
xen/arch/x86/microcode.c
xen/arch/x86/mtrr/main.c
xen/arch/x86/nmi.c
xen/arch/x86/setup.c
xen/arch/x86/shadow.c
xen/arch/x86/smp.c
xen/arch/x86/smpboot.c
xen/arch/x86/time.c
xen/arch/x86/traps.c
xen/arch/x86/vmx.c
xen/common/ac_timer.c
xen/common/dom0_ops.c
xen/common/domain.c
xen/common/page_alloc.c
xen/common/perfc.c
xen/common/sched_bvt.c
xen/common/sched_sedf.c
xen/common/schedule.c
xen/common/trace.c
xen/include/asm-x86/asm_defns.h
xen/include/asm-x86/bitops.h
xen/include/asm-x86/div64.h
xen/include/asm-x86/flushtlb.h
xen/include/asm-x86/irq.h
xen/include/asm-x86/processor.h
xen/include/xen/bitmap.h
xen/include/xen/cpumask.h
xen/include/xen/kernel.h
xen/include/xen/sched-if.h
xen/include/xen/sched.h
xen/include/xen/smp.h

index 8f12179c5dfba361d5096fc733513550f2044850..2dff8d5fd261758ae9f49f23f5fc83ad461471bc 100644 (file)
@@ -124,7 +124,6 @@ void startup_cpu_idle_loop(void)
 {
        /* Just some sanity to ensure that the scheduler is set up okay. */
        ASSERT(current->domain == IDLE_DOMAIN_ID);
-       domain_unpause_by_systemcontroller(current->domain);
        raise_softirq(SCHEDULE_SOFTIRQ);
        do_softirq();
 
index 605ac157cabe70e2d4b26bc1e5d6766e7bb26abb..ba6cd64f9482bfdb91751bcbea3e0640f6923654 100644 (file)
@@ -249,13 +249,11 @@ printk("About to call sort_main_extable()\n");
     /* Create initial domain 0. */
 printk("About to call do_createdomain()\n");
     dom0 = do_createdomain(0, 0);
-printk("About to call init_idle_task()\n");
     init_task.domain = &idle0_domain;
     init_task.processor = 0;
 //    init_task.mm = &init_mm;
     init_task.domain->arch.mm = &init_mm;
 //    init_task.thread = INIT_THREAD;
-    init_idle_task();
     //arch_do_createdomain(current);
 #ifdef CLONE_DOMAIN0
     {
@@ -314,7 +312,6 @@ printk("About to call init_trace_bufs()\n");
     console_endboot(cmdline && strstr(cmdline, "tty0"));
 #endif
 
-    domain_unpause_by_systemcontroller(current->domain);
 #ifdef CLONE_DOMAIN0
     {
     int i;
index 79c35b871986a1f1e326102f2af2b3302986f700..19f61476488c00062e71eb8657c669d75828643b 100644 (file)
@@ -34,7 +34,6 @@
 #include <asm/io_apic.h>
 #include <asm/apic.h>
 #include <asm/io.h>
-#include <asm/irq.h>
 #include <asm/mpspec.h>
 #include <mach_apic.h>
 #include <mach_mpparse.h>
index cb4bd1fd73a54e367adadb09f710e3aec4b17596..86bdb6253c8e815a2176724245265415b496afff 100644 (file)
@@ -663,7 +663,7 @@ void (*wait_timer_tick)(void) __initdata = wait_8254_wraparound;
 
 #define APIC_DIVISOR 1
 
-static void __setup_APIC_LVTT(unsigned int clocks)
+void __setup_APIC_LVTT(unsigned int clocks)
 {
     unsigned int lvtt_value, tmp_value, ver;
 
@@ -680,30 +680,33 @@ static void __setup_APIC_LVTT(unsigned int clocks)
     apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
 }
 
-/*
- * this is done for every CPU from setup_APIC_clocks() below.
- * We setup each local APIC with a zero timeout value for now.
- * Unlike Linux, we don't have to wait for slices etc.
- */
-void setup_APIC_timer(void * data)
+static void __init setup_APIC_timer(unsigned int clocks)
 {
     unsigned long flags;
-    __save_flags(flags);
-    __sti();
-    __setup_APIC_LVTT(0);
-    __restore_flags(flags);
+    
+    local_irq_save(flags);
+
+    /*
+     * Wait for IRQ0's slice:
+     */
+    wait_timer_tick();
+
+    __setup_APIC_LVTT(clocks);
+
+    local_irq_restore(flags);
 }
 
 /*
- * In this function we calibrate APIC bus clocks to the external timer.
- *
- * As a result we have the Bus Speed and CPU speed in Hz.
- * 
- * We want to do the calibration only once (for CPU0).  CPUs connected by the
- * same APIC bus have the very same bus frequency.
+ * In this function we calibrate APIC bus clocks to the external
+ * timer. Unfortunately we cannot use jiffies and the timer irq
+ * to calibrate, since some later bootup code depends on getting
+ * the first irq? Ugh.
  *
- * This bit is a bit shoddy since we use the very same periodic timer interrupt
- * we try to eliminate to calibrate the APIC. 
+ * We want to do the calibration only once since we
+ * want to have local timer irqs syncron. CPUs connected
+ * by the same APIC bus have the very same bus frequency.
+ * And we want to have irqs off anyways, no accidental
+ * APIC irq that way.
  */
 
 int __init calibrate_APIC_clock(void)
@@ -780,21 +783,48 @@ int __init calibrate_APIC_clock(void)
     return result;
 }
 
-/*
- * initialise the APIC timers for all CPUs
- * we start with the first and find out processor frequency and bus speed
- */
-void __init setup_APIC_clocks (void)
+
+static unsigned int calibration_result;
+
+void __init setup_boot_APIC_clock(void)
 {
+    apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n");
     using_apic_timer = 1;
-    __cli();
-    /* calibrate CPU0 for CPU speed and BUS speed */
-    bus_freq = calibrate_APIC_clock();
-    /* Now set up the timer for real. */
-    setup_APIC_timer((void *)bus_freq);
-    __sti();
-    /* and update all other cpus */
-    smp_call_function(setup_APIC_timer, (void *)bus_freq, 1, 1);
+
+    local_irq_disable();
+    
+    calibration_result = calibrate_APIC_clock();
+    /*
+     * Now set up the timer for real.
+     */
+    setup_APIC_timer(calibration_result);
+    
+    local_irq_enable();
+}
+
+void __init setup_secondary_APIC_clock(void)
+{
+    setup_APIC_timer(calibration_result);
+}
+
+void __init disable_APIC_timer(void)
+{
+    if (using_apic_timer) {
+        unsigned long v;
+        
+        v = apic_read(APIC_LVTT);
+        apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
+    }
+}
+
+void enable_APIC_timer(void)
+{
+    if (using_apic_timer) {
+        unsigned long v;
+        
+        v = apic_read(APIC_LVTT);
+        apic_write_around(APIC_LVTT, v & ~APIC_LVT_MASKED);
+    }
 }
 
 #undef APIC_DIVISOR
@@ -885,7 +915,7 @@ asmlinkage void smp_spurious_interrupt(struct cpu_user_regs *regs)
         ack_APIC_irq();
 
     /* see sw-dev-man vol 3, chapter 7.4.13.5 */
-    printk("spurious APIC interrupt on CPU#%d, should never happen.\n",
+    printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should never happen.\n",
            smp_processor_id());
 }
 
@@ -914,8 +944,8 @@ asmlinkage void smp_error_interrupt(struct cpu_user_regs *regs)
        6: Received illegal vector
        7: Illegal register address
     */
-    printk("APIC error on CPU%d: %02lx(%02lx)\n",
-            smp_processor_id(), v, v1);
+    printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
+            smp_processor_id(), v , v1);
 }
 
 /*
@@ -940,20 +970,18 @@ int __init APIC_init_uniprocessor (void)
 
     connect_bsp_APIC();
 
-#ifdef CONFIG_SMP
-    cpu_online_map = 1;
-#endif
     phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
-    apic_write_around(APIC_ID, boot_cpu_physical_apicid);
 
     setup_local_APIC();
 
+    if (nmi_watchdog == NMI_LOCAL_APIC)
+        check_nmi_watchdog();
 #ifdef CONFIG_X86_IO_APIC
     if (smp_found_config)
         if (!skip_ioapic_setup && nr_ioapics)
             setup_IO_APIC();
 #endif
-    setup_APIC_clocks();
+    setup_boot_APIC_clock();
 
     return 0;
 }
index 899493380fad051ae30d13a9f9899f61f85cf7d9..f92e78f9c67de809dea8d33c620cb8d02e0d2a63 100644 (file)
@@ -9,7 +9,7 @@
 #include <xen/lib.h>
 #include <asm/uaccess.h>
 #include <xen/serial.h>
-#include <asm/irq.h>
+#include <xen/irq.h>
 #include <xen/spinlock.h>
 #include <asm/debugger.h>
 #include <xen/init.h>
index 85fbe494f19b3205d9bfcfbf094b86c5beca9b04..423291197863a673bf3b5578374788075d22b993 100644 (file)
@@ -176,8 +176,8 @@ long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op)
     {
         dom0_physinfo_t *pi = &op->u.physinfo;
 
-        pi->ht_per_core = opt_noht ? 1 : ht_per_core;
-        pi->cores       = smp_num_cpus / pi->ht_per_core;
+        pi->ht_per_core = ht_per_core;
+        pi->cores       = num_online_cpus() / ht_per_core;
         pi->total_pages = max_page;
         pi->free_pages  = avail_domheap_pages();
         pi->cpu_khz     = cpu_khz;
index e046e9017d1ab7341d4506e825929782a378fb59..30795b5831b0ac964ecc344ad88c71b01eabc157 100644 (file)
@@ -73,44 +73,31 @@ static void default_idle(void)
 void idle_loop(void)
 {
     int cpu = smp_processor_id();
+
     for ( ; ; )
     {
         irq_stat[cpu].idle_timestamp = jiffies;
+
         while ( !softirq_pending(cpu) )
         {
             page_scrub_schedule_work();
             default_idle();
         }
+
         do_softirq();
     }
 }
 
-static void __startup_cpu_idle_loop(struct exec_domain *ed)
-{
-    /* Signal to boot CPU that we are done. */
-    init_idle();
-
-    /* Start normal idle loop. */
-    ed->arch.schedule_tail = continue_idle_task;
-    continue_idle_task(ed);
-}
-
 void startup_cpu_idle_loop(void)
 {
     struct exec_domain *ed = current;
 
-    /* Just some sanity to ensure that the scheduler is set up okay. */
-    ASSERT(ed->domain->domain_id == IDLE_DOMAIN_ID);
+    ASSERT(is_idle_task(ed->domain));
     percpu_ctxt[smp_processor_id()].curr_ed = ed;
     set_bit(smp_processor_id(), &ed->domain->cpuset);
-    domain_unpause_by_systemcontroller(ed->domain);
-
-    ed->arch.schedule_tail = __startup_cpu_idle_loop;
-    raise_softirq(SCHEDULE_SOFTIRQ);
-    do_softirq();
+    ed->arch.schedule_tail = continue_idle_task;
 
-    /* End up in __startup_cpu_idle_loop, not here. */
-    BUG();
+    idle_loop();
 }
 
 static long no_idt[2];
@@ -244,7 +231,7 @@ void arch_do_createdomain(struct exec_domain *ed)
 
     ed->arch.flags = TF_kernel_mode;
 
-    if ( d->domain_id == IDLE_DOMAIN_ID )
+    if ( is_idle_task(d) )
         return;
 
     ed->arch.schedule_tail = continue_nonidle_task;
index a8c66d628171f05f847db8c2b046b42acdf42ae1..ebdbb622c88a1f3c74a4c9b26dfe9aacd8ecc3c3 100644 (file)
@@ -438,7 +438,7 @@ int construct_dom0(struct domain *d,
     /* Mask all upcalls... */
     for ( i = 0; i < MAX_VIRT_CPUS; i++ )
         d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
-    d->shared_info->n_vcpu = smp_num_cpus;
+    d->shared_info->n_vcpu = num_online_cpus();
 
     /* Set up monitor table */
     update_pagetables(ed);
index 286313f6bb7267f8e8a23a73d0c98f72daf96e17..71a826045379632293d408ae6c76830a411d4128 100644 (file)
@@ -2259,7 +2259,7 @@ int ioapic_guest_write(int apicid, int address, u32 val)
     
     pin = (address - 0x10) >> 1;
 
-    rte.dest.logical.logical_dest = target_cpus();
+    rte.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
     *(int *)&rte = val;
 
     if ( rte.vector >= FIRST_DEVICE_VECTOR )
index aaaff647cebaa201e2a006b81b2784f207549c66..88807d2b3d577a569cb37b3281e5c94aba999321 100644 (file)
@@ -237,6 +237,7 @@ int pirq_guest_bind(struct exec_domain *ed, int irq, int will_share)
     irq_guest_action_t *action;
     unsigned long       flags;
     int                 rc = 0;
+    cpumask_t           cpumask = CPU_MASK_NONE;
 
     if ( !IS_CAPABLE_PHYSDEV(d) )
         return -EPERM;
@@ -273,9 +274,9 @@ int pirq_guest_bind(struct exec_domain *ed, int irq, int will_share)
         desc->handler->startup(irq);
 
         /* Attempt to bind the interrupt target to the correct CPU. */
+        cpu_set(ed->processor, cpumask);
         if ( desc->handler->set_affinity != NULL )
-            desc->handler->set_affinity(
-                irq, apicid_to_phys_cpu_present(ed->processor));
+            desc->handler->set_affinity(irq, cpumask);
     }
     else if ( !will_share || !action->shareable )
     {
index 4cbafae1e63960ffcb242d1ec8fcc3b70b9401e7..fcf4f94125aa44259df5794b2cd74c916c2bd8a3 100644 (file)
@@ -86,7 +86,6 @@
 #define up(_m) spin_unlock(_m)
 #define vmalloc(_s) xmalloc_bytes(_s)
 #define vfree(_p) xfree(_p)
-#define num_online_cpus() smp_num_cpus
 
 #if 0
 MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
index b6122d9d0216896254a7f977b2cd9d7a6dc4fe4f..50c2f428b47000b5599821a2fb3f43483b9fc88d 100644 (file)
@@ -49,8 +49,6 @@
 #define down(_m) spin_lock(_m)
 #define up(_m) spin_unlock(_m)
 
-#define num_booting_cpus() smp_num_cpus
-
 u32 num_var_ranges = 0;
 
 unsigned int *usage_table;
index aef14645e46930e6a14d39ecadc3fdabda6fbd08..94ec450d1b4272a53c9ff2c9e5db73ddce60d391 100644 (file)
@@ -92,13 +92,16 @@ int __init check_nmi_watchdog (void)
 
     printk("Testing NMI watchdog --- ");
 
-    for ( cpu = 0; cpu < smp_num_cpus; cpu++ ) 
+    for ( cpu = 0; cpu < NR_CPUS; cpu++ ) 
         prev_nmi_count[cpu] = nmi_count(cpu);
-    __sti();
+    local_irq_enable();
     mdelay((10*1000)/nmi_hz); /* wait 10 ticks */
 
-    for ( cpu = 0; cpu < smp_num_cpus; cpu++ ) 
+    for ( cpu = 0; cpu < NR_CPUS; cpu++ ) 
     {
+        if ( !cpu_isset(cpu, cpu_callin_map) && 
+             !cpu_isset(cpu, cpu_online_map) )
+            continue;
         if ( nmi_count(cpu) - prev_nmi_count[cpu] <= 5 )
             printk("CPU#%d stuck. ", cpu);
         else
@@ -277,13 +280,6 @@ void watchdog_enable(void)
     spin_unlock_irqrestore(&watchdog_lock, flags);
 }
 
-void touch_nmi_watchdog (void)
-{
-    int i;
-    for (i = 0; i < smp_num_cpus; i++)
-        alert_counter[i] = 0;
-}
-
 void nmi_watchdog_tick (struct cpu_user_regs * regs)
 {
     int sum, cpu = smp_processor_id();
index ee5c915d06c38e030bd50bbb3561c712ab07ff97..7fcadad2d7c95d5746a248fac630251188974e3a 100644 (file)
@@ -33,6 +33,14 @@ integer_param("xenheap_megabytes", opt_xenheap_megabytes);
 int opt_noht = 0;
 boolean_param("noht", opt_noht);
 
+/* opt_nosmp: If true, secondary processors are ignored. */
+static int opt_nosmp = 0;
+boolean_param("nosmp", opt_nosmp);
+
+/* maxcpus: maximum number of CPUs to activate. */
+static unsigned int max_cpus = NR_CPUS;
+integer_param("maxcpus", max_cpus); 
+
 /* opt_watchdog: If true, run a watchdog NMI on each processor. */
 static int opt_watchdog = 0;
 boolean_param("watchdog", opt_watchdog);
@@ -58,6 +66,9 @@ boolean_param("noapic", skip_ioapic_setup);
 
 int early_boot = 1;
 
+int ht_per_core = 1;
+cpumask_t cpu_present_map;
+
 /* Limits of Xen heap, used to initialise the allocator. */
 unsigned long xenheap_phys_start, xenheap_phys_end;
 
@@ -67,7 +78,6 @@ extern void trap_init(void);
 extern void time_init(void);
 extern void ac_timer_init(void);
 extern void initialize_keytable();
-extern int do_timer_lists_from_pit;
 
 extern unsigned long cpu0_stack[];
 
@@ -80,13 +90,10 @@ unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE;
 #endif
 EXPORT_SYMBOL(mmu_cr4_features);
 
-unsigned long wait_init_idle;
-
 struct exec_domain *idle_task[NR_CPUS] = { &idle0_exec_domain };
 
 int acpi_disabled;
 
-int phys_proc_id[NR_CPUS];
 int logical_proc_id[NR_CPUS];
 
 /* Standard macro to see if a specific flag is changeable. */
@@ -147,12 +154,11 @@ static void __init init_intel(struct cpuinfo_x86 *c)
     if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 )
         clear_bit(X86_FEATURE_SEP, &c->x86_capability);
 
-#ifdef CONFIG_SMP
     if ( test_bit(X86_FEATURE_HT, &c->x86_capability) )
     {
         u32     eax, ebx, ecx, edx;
         int     initial_apic_id, siblings, cpu = smp_processor_id();
-        
+
         cpuid(1, &eax, &ebx, &ecx, &edx);
         ht_per_core = siblings = (ebx & 0xff0000) >> 16;
 
@@ -176,7 +182,6 @@ static void __init init_intel(struct cpuinfo_x86 *c)
                    cpu, phys_proc_id[cpu], logical_proc_id[cpu]);
         }
     }
-#endif
 
 #ifdef CONFIG_VMX
     start_vmx();
@@ -292,6 +297,10 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
     }
 }
 
+void __init print_cpu_info(struct cpuinfo_x86 *c)
+{
+    printk("booted.\n");
+}
 
 unsigned long cpu_initialized;
 void __init cpu_init(void)
@@ -335,8 +344,6 @@ void __init cpu_init(void)
 
     /* Install correct page table. */
     write_ptbase(current);
-
-    init_idle_task();
 }
 
 int acpi_force;
@@ -383,6 +390,8 @@ static void __init do_initcalls(void)
 
 static void __init start_of_day(void)
 {
+    int i;
+
     /* Unmap the first page of CPU0's stack. */
     memguard_guard_stack(cpu0_stack);
 
@@ -421,8 +430,6 @@ static void __init start_of_day(void)
 
     init_apic_mappings();
 
-    scheduler_init();  
-
     init_IRQ();
 
     trap_init();
@@ -431,41 +438,41 @@ static void __init start_of_day(void)
 
     arch_init_memory();
 
-    smp_boot_cpus();
+    scheduler_init();  
+
+    if ( opt_nosmp )
+        max_cpus = 0;
+    smp_prepare_cpus(max_cpus);
 
-    __sti();
+    /* We aren't hotplug-capable yet. */
+    BUG_ON(!cpus_empty(cpu_present_map));
+    for_each_cpu ( i )
+        cpu_set(i, cpu_present_map);
 
     initialize_keytable();
 
     serial_init_stage2();
 
-    if ( !cpu_has_apic )
+    ac_timer_init();
+
+    init_xen_time();
+
+    for_each_present_cpu ( i )
     {
-        do_timer_lists_from_pit = 1;
-        if ( smp_num_cpus != 1 )
-            panic("We need local APICs on SMP machines!");
+        if ( num_online_cpus() >= max_cpus )
+            break;
+        if ( !cpu_online(i) )
+            __cpu_up(i);
     }
 
-    ac_timer_init();    /* init accurate timers */
-    init_xen_time();   /* initialise the time */
-    schedulers_start(); /* start scheduler for each CPU */
-
-    check_nmi_watchdog();
+    printk("Brought up %ld CPUs\n", (long)num_online_cpus());
+    smp_cpus_done(max_cpus);
 
     do_initcalls();
 
-    wait_init_idle = cpu_online_map;
-    clear_bit(smp_processor_id(), &wait_init_idle);
-    smp_threads_ready = 1;
-    smp_commence(); /* Tell other CPUs that state of the world is stable. */
-    while ( wait_init_idle != 0 )
-        cpu_relax();
+    schedulers_start();
 
     watchdog_enable();
-
-#ifdef CONFIG_X86_64 /* x86_32 uses low mappings when building DOM0. */
-    zap_low_mappings();
-#endif
 }
 
 #define EARLY_FAIL() for ( ; ; ) __asm__ __volatile__ ( "hlt" )
@@ -487,6 +494,8 @@ void __init __start_xen(multiboot_info_t *mbi)
     set_current(&idle0_exec_domain);
     set_processor_id(0);
 
+    smp_prepare_boot_cpu();
+
     /* We initialise the serial devices very early so we can get debugging. */
     serial_init_stage1();
 
@@ -695,8 +704,8 @@ void __init __start_xen(multiboot_info_t *mbi)
     /* Hide UART from DOM0 if we're using it */
     serial_endboot();
 
-    domain_unpause_by_systemcontroller(current->domain);
     domain_unpause_by_systemcontroller(dom0);
+
     startup_cpu_idle_loop();
 }
 
index 86ae84f116a528dae8d3c94e21d02a654a4bb3bb..83d7fc11b2e761870ffe511ab95a95412ddaa285 100644 (file)
@@ -2525,7 +2525,7 @@ void __shadow_sync_all(struct domain *d)
     // page table page needs to be vcpu private).
     //
 #if 0 // this should be enabled for SMP guests...
-    flush_tlb_mask(((1 << smp_num_cpus) - 1) & ~(1 << smp_processor_id()));
+    flush_tlb_mask(((1<<num_online_cpus()) - 1) & ~(1<<smp_processor_id()));
 #endif
     need_flush = 1;
 
index 32641165a54f5ab8cdce0ea6c5adb338dd123df6..71b565e0979db8b356a06373bb789ea886f2049a 100644 (file)
@@ -141,7 +141,7 @@ static inline void send_IPI_allbutself(int vector)
      * If there are no other CPUs in the system then we get an APIC send error 
      * if we try to broadcast. thus we have to avoid sending IPIs in this case.
      */
-    if ( smp_num_cpus <= 1 )
+    if ( num_online_cpus() <= 1 )
         return;
 
     __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
@@ -192,10 +192,10 @@ void new_tlbflush_clock_period(void)
     ASSERT(local_irq_is_enabled());
     
     /* Flush everyone else. We definitely flushed just before entry. */
-    if ( smp_num_cpus > 1 )
+    if ( num_online_cpus() > 1 )
     {
         spin_lock(&flush_lock);
-        flush_cpumask  = (1UL << smp_num_cpus) - 1;
+        flush_cpumask  = (1UL << num_online_cpus()) - 1;
         flush_cpumask &= ~(1UL << smp_processor_id());
         flush_va       = FLUSHVA_ALL;
         send_IPI_allbutself(INVALIDATE_TLB_VECTOR);
@@ -257,7 +257,7 @@ int smp_call_function(
 
     ASSERT(local_irq_is_enabled());
 
-    cpuset = ((1UL << smp_num_cpus) - 1) & ~(1UL << smp_processor_id());
+    cpuset = ((1UL << num_online_cpus()) - 1) & ~(1UL << smp_processor_id());
     if ( cpuset == 0 )
         return 0;
 
@@ -295,7 +295,6 @@ void smp_send_stop(void)
 {
     /* Stop all other CPUs in the system. */
     smp_call_function(stop_this_cpu, NULL, 1, 0);
-    smp_num_cpus = 1;
 
     local_irq_disable();
     disable_local_APIC();
index 4dcdf025c0439f4acab2d7eb4b844a4c9261fde0..5b43462e50bf7883c3a1d4537bb7e8b44152bb8b 100644 (file)
@@ -17,7 +17,7 @@
  *     Fixes
  *             Felix Koop      :       NR_CPUS used properly
  *             Jose Renau      :       Handle single CPU case.
- *             Alan Cox        :       By repeated request 8) - Total BogoMIP report.
+ *             Alan Cox        :       By repeated request 8) - Total BogoMIPS report.
  *             Greg Wright     :       Fix for kernel stacks panic.
  *             Erich Boleyn    :       MP v1.4 and additional changes.
  *     Matthias Sattler        :       Changes for 2.1 kernel map.
  *             Tigran Aivazian :       fixed "0.00 in /proc/uptime on SMP" bug.
  *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs
  *             Martin J. Bligh :       Added support for multi-quad systems
- */
+ *             Dave Jones      :       Report invalid combinations of Athlon CPUs.
+*              Rusty Russell   :       Hacked into shape for new "hotplug" boot process. */
 
 #include <xen/config.h>
 #include <xen/init.h>
-#include <xen/irq.h>
+#include <xen/kernel.h>
 #include <xen/mm.h>
-#include <xen/slab.h>
-#include <asm/flushtlb.h>
-#include <asm/mc146818rtc.h>
-#include <asm/smpboot.h>
-#include <xen/smp.h>
-#include <asm/msr.h>
-#include <asm/system.h>
-#include <asm/mpspec.h>
-#include <asm/io_apic.h>
 #include <xen/sched.h>
+#include <xen/irq.h>
 #include <xen/delay.h>
-#include <xen/lib.h>
+#include <asm/mc146818rtc.h>
+#include <asm/desc.h>
+#include <asm/div64.h>
+#include <asm/msr.h>
 #include <mach_apic.h>
 #include <mach_wakecpu.h>
+#include <smpboot_hooks.h>
 
-/* opt_nosmp: If true, secondary processors are ignored. */
-static int opt_nosmp = 0;
-boolean_param("nosmp", opt_nosmp);
-
-/* maxcpus: maximum number of CPUs to activate. */
-static int max_cpus = -1;
-integer_param("maxcpus", max_cpus); 
+static int _foo;
+#define set_kernel_exec(x,y) (_foo=0)
+#define alloc_bootmem_low_pages(x) __va(0x90000) /* trampoline address */
+int tainted;
+#define TAINT_UNSAFE_SMP 0
 
-/* Total count of live CPUs */
-int smp_num_cpus = 1;
+/* Set if we find a B stepping CPU */
+static int __initdata smp_b_stepping;
 
-/* Number of hyperthreads per core */
-int ht_per_core = 1;
+/* Number of siblings per CPU package */
+int smp_num_siblings = 1;
+int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
+EXPORT_SYMBOL(phys_proc_id);
 
-/* Bitmask of currently online CPUs */
+/* bitmap of online cpus */
 cpumask_t cpu_online_map;
 
 cpumask_t cpu_callin_map;
 cpumask_t cpu_callout_map;
+static cpumask_t smp_commenced_mask;
 
 /* Per CPU bogomips and other parameters */
-struct cpuinfo_x86 cpu_data[NR_CPUS];
+struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
 
-/* Set when the idlers are all forked */
-int smp_threads_ready;
+u8 x86_cpu_to_apicid[NR_CPUS] =
+                       { [0 ... NR_CPUS-1] = 0xff };
+EXPORT_SYMBOL(x86_cpu_to_apicid);
 
 /*
  * Trampoline 80x86 program as an array.
@@ -84,6 +83,7 @@ int smp_threads_ready;
 extern unsigned char trampoline_data [];
 extern unsigned char trampoline_end  [];
 static unsigned char *trampoline_base;
+static int trampoline_exec;
 
 /*
  * Currently trivial. Write the real->protected mode
@@ -93,8 +93,8 @@ static unsigned char *trampoline_base;
 
 static unsigned long __init setup_trampoline(void)
 {
-    memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
-    return virt_to_phys(trampoline_base);
+       memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
+       return virt_to_phys(trampoline_base);
 }
 
 /*
@@ -103,11 +103,17 @@ static unsigned long __init setup_trampoline(void)
  */
 void __init smp_alloc_memory(void)
 {
-    /*
-     * Has to be in very low memory so we can execute
-     * real-mode AP code.
-     */
-    trampoline_base = __va(0x90000);
+       trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
+       /*
+        * Has to be in very low memory so we can execute
+        * real-mode AP code.
+        */
+       if (__pa(trampoline_base) >= 0x9F000)
+               BUG();
+       /*
+        * Make the SMP trampoline executable:
+        */
+       trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
 }
 
 /*
@@ -115,40 +121,63 @@ void __init smp_alloc_memory(void)
  * a given CPU
  */
 
-void __init smp_store_cpu_info(int id)
+static void __init smp_store_cpu_info(int id)
 {
-    cpu_data[id] = boot_cpu_data;
-    if (id != 0)
-        identify_cpu(&cpu_data[id]);
-}
-
-/*
- * Architecture specific routine called by the kernel just before init is
- * fired off. This allows the BP to have everything in order [we hope].
- * At the end of this all the APs will hit the system scheduling and off
- * we go. Each AP will load the system gdt's and jump through the kernel
- * init into idle(). At this point the scheduler will one day take over
- * and give them jobs to do. smp_callin is a standard routine
- * we use to track CPUs as they power up.
- */
-
-static atomic_t smp_commenced = ATOMIC_INIT(0);
-
-void __init smp_commence(void)
-{
-    /*
-     * Lets the callins below out of their loop.
-     */
-    Dprintk("Setting commenced=1, go go go\n");
-
-    wmb();
-    atomic_set(&smp_commenced,1);
+       struct cpuinfo_x86 *c = cpu_data + id;
+
+       *c = boot_cpu_data;
+       if (id!=0)
+               identify_cpu(c);
+       /*
+        * Mask B, Pentium, but not Pentium MMX
+        */
+       if (c->x86_vendor == X86_VENDOR_INTEL &&
+           c->x86 == 5 &&
+           c->x86_mask >= 1 && c->x86_mask <= 4 &&
+           c->x86_model <= 3)
+               /*
+                * Remember we have B step Pentia with bugs
+                */
+               smp_b_stepping = 1;
+
+       /*
+        * Certain Athlons might work (for various values of 'work') in SMP
+        * but they are not certified as MP capable.
+        */
+       if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
+
+               /* Athlon 660/661 is valid. */  
+               if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
+                       goto valid_k7;
+
+               /* Duron 670 is valid */
+               if ((c->x86_model==7) && (c->x86_mask==0))
+                       goto valid_k7;
+
+               /*
+                * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
+                * It's worth noting that the A5 stepping (662) of some Athlon XP's
+                * have the MP bit set.
+                * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
+                */
+               if (((c->x86_model==6) && (c->x86_mask>=2)) ||
+                   ((c->x86_model==7) && (c->x86_mask>=1)) ||
+                    (c->x86_model> 7))
+                       if (cpu_has_mp)
+                               goto valid_k7;
+
+               /* If we get here, it's not a certified SMP capable AMD system. */
+               tainted |= TAINT_UNSAFE_SMP;
+       }
+
+valid_k7:
+       ;
 }
 
 /*
  * TSC synchronization.
  *
- * We first check wether all CPUs have their TSC's synchronized,
+ * We first check whether all CPUs have their TSC's synchronized,
  * then we print a warning if not, and always resync.
  */
 
@@ -159,616 +188,724 @@ static unsigned long long tsc_values[NR_CPUS];
 
 #define NR_LOOPS 5
 
-/*
- * accurate 64-bit/32-bit division, expanded to 32-bit divisions and 64-bit
- * multiplication. Not terribly optimized but we need it at boot time only
- * anyway.
- *
- * result == a / b
- *     == (a1 + a2*(2^32)) / b
- *     == a1/b + a2*(2^32/b)
- *     == a1/b + a2*((2^32-1)/b) + a2/b + (a2*((2^32-1) % b))/b
- *                 ^---- (this multiplication can overflow)
- */
-
-static unsigned long long div64 (unsigned long long a, unsigned long b0)
-{
-    unsigned int a1, a2;
-    unsigned long long res;
-
-    a1 = ((unsigned int*)&a)[0];
-    a2 = ((unsigned int*)&a)[1];
-
-    res = a1/b0 +
-        (unsigned long long)a2 * (unsigned long long)(0xffffffff/b0) +
-        a2 / b0 +
-        (a2 * (0xffffffff % b0)) / b0;
-
-    return res;
-}
-
 static void __init synchronize_tsc_bp (void)
 {
-    int i;
-    unsigned long long t0;
-    unsigned long long sum, avg;
-    long long delta;
-    int buggy = 0;
-
-    printk("checking TSC synchronization across CPUs: ");
-
-    atomic_set(&tsc_start_flag, 1);
-    wmb();
-
-    /*
-     * We loop a few times to get a primed instruction cache,
-     * then the last pass is more or less synchronized and
-     * the BP and APs set their cycle counters to zero all at
-     * once. This reduces the chance of having random offsets
-     * between the processors, and guarantees that the maximum
-     * delay between the cycle counters is never bigger than
-     * the latency of information-passing (cachelines) between
-     * two CPUs.
-     */
-    for (i = 0; i < NR_LOOPS; i++) {
-        /*
-         * all APs synchronize but they loop on '== num_cpus'
-         */
-        while (atomic_read(&tsc_count_start) != smp_num_cpus-1) mb();
-        atomic_set(&tsc_count_stop, 0);
-        wmb();
-        /*
-         * this lets the APs save their current TSC:
-         */
-        atomic_inc(&tsc_count_start);
-
-        rdtscll(tsc_values[smp_processor_id()]);
-        /*
-         * We clear the TSC in the last loop:
-         */
-        if (i == NR_LOOPS-1)
-            write_tsc(0, 0);
-
-        /*
-         * Wait for all APs to leave the synchronization point:
-         */
-        while (atomic_read(&tsc_count_stop) != smp_num_cpus-1) mb();
-        atomic_set(&tsc_count_start, 0);
-        wmb();
-        atomic_inc(&tsc_count_stop);
-    }
-
-    sum = 0;
-    for (i = 0; i < smp_num_cpus; i++) {
-        t0 = tsc_values[i];
-        sum += t0;
-    }
-    avg = div64(sum, smp_num_cpus);
-
-    sum = 0;
-    for (i = 0; i < smp_num_cpus; i++) {
-        delta = tsc_values[i] - avg;
-        if (delta < 0)
-            delta = -delta;
-        /*
-         * We report bigger than 2 microseconds clock differences.
-         */
-        if (delta > 2*ticks_per_usec) {
-            long realdelta;
-            if (!buggy) {
-                buggy = 1;
-                printk("\n");
-            }
-            realdelta = div64(delta, ticks_per_usec);
-            if (tsc_values[i] < avg)
-                realdelta = -realdelta;
-
-            printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n",
-                   i, realdelta);
-        }
-
-        sum += delta;
-    }
-    if (!buggy)
-        printk("passed.\n");
+       int i;
+       unsigned long long t0;
+       unsigned long long sum, avg;
+       long long delta;
+       unsigned long one_usec;
+       int buggy = 0;
+
+       printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
+
+       /* convert from kcyc/sec to cyc/usec */
+       one_usec = cpu_khz / 1000;
+
+       atomic_set(&tsc_start_flag, 1);
+       wmb();
+
+       /*
+        * We loop a few times to get a primed instruction cache,
+        * then the last pass is more or less synchronized and
+        * the BP and APs set their cycle counters to zero all at
+        * once. This reduces the chance of having random offsets
+        * between the processors, and guarantees that the maximum
+        * delay between the cycle counters is never bigger than
+        * the latency of information-passing (cachelines) between
+        * two CPUs.
+        */
+       for (i = 0; i < NR_LOOPS; i++) {
+               /*
+                * all APs synchronize but they loop on '== num_cpus'
+                */
+               while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
+                       mb();
+               atomic_set(&tsc_count_stop, 0);
+               wmb();
+               /*
+                * this lets the APs save their current TSC:
+                */
+               atomic_inc(&tsc_count_start);
+
+               rdtscll(tsc_values[smp_processor_id()]);
+               /*
+                * We clear the TSC in the last loop:
+                */
+               if (i == NR_LOOPS-1)
+                       write_tsc(0, 0);
+
+               /*
+                * Wait for all APs to leave the synchronization point:
+                */
+               while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
+                       mb();
+               atomic_set(&tsc_count_start, 0);
+               wmb();
+               atomic_inc(&tsc_count_stop);
+       }
+
+       sum = 0;
+       for (i = 0; i < NR_CPUS; i++) {
+               if (cpu_isset(i, cpu_callout_map)) {
+                       t0 = tsc_values[i];
+                       sum += t0;
+               }
+       }
+       avg = sum;
+       do_div(avg, num_booting_cpus());
+
+       sum = 0;
+       for (i = 0; i < NR_CPUS; i++) {
+               if (!cpu_isset(i, cpu_callout_map))
+                       continue;
+               delta = tsc_values[i] - avg;
+               if (delta < 0)
+                       delta = -delta;
+               /*
+                * We report bigger than 2 microseconds clock differences.
+                */
+               if (delta > 2*one_usec) {
+                       long realdelta;
+                       if (!buggy) {
+                               buggy = 1;
+                               printk("\n");
+                       }
+                       realdelta = delta;
+                       do_div(realdelta, one_usec);
+                       if (tsc_values[i] < avg)
+                               realdelta = -realdelta;
+
+                       printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
+               }
+
+               sum += delta;
+       }
+       if (!buggy)
+               printk("passed.\n");
 }
 
 static void __init synchronize_tsc_ap (void)
 {
-    int i;
-
-    /*
-     * smp_num_cpus is not necessarily known at the time
-     * this gets called, so we first wait for the BP to
-     * finish SMP initialization:
-     */
-    while (!atomic_read(&tsc_start_flag)) mb();
-
-    for (i = 0; i < NR_LOOPS; i++) {
-        atomic_inc(&tsc_count_start);
-        while (atomic_read(&tsc_count_start) != smp_num_cpus) mb();
-
-        rdtscll(tsc_values[smp_processor_id()]);
-        if (i == NR_LOOPS-1)
-            write_tsc(0, 0);
-
-        atomic_inc(&tsc_count_stop);
-        while (atomic_read(&tsc_count_stop) != smp_num_cpus) mb();
-    }
+       int i;
+
+       /*
+        * Not every cpu is online at the time
+        * this gets called, so we first wait for the BP to
+        * finish SMP initialization:
+        */
+       while (!atomic_read(&tsc_start_flag)) mb();
+
+       for (i = 0; i < NR_LOOPS; i++) {
+               atomic_inc(&tsc_count_start);
+               while (atomic_read(&tsc_count_start) != num_booting_cpus())
+                       mb();
+
+               rdtscll(tsc_values[smp_processor_id()]);
+               if (i == NR_LOOPS-1)
+                       write_tsc(0, 0);
+
+               atomic_inc(&tsc_count_stop);
+               while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
+       }
 }
 #undef NR_LOOPS
 
+extern void calibrate_delay(void);
+
 static atomic_t init_deasserted;
 
 void __init smp_callin(void)
 {
-    int cpuid, phys_id, i;
-
-    /*
-     * If waken up by an INIT in an 82489DX configuration
-     * we may get here before an INIT-deassert IPI reaches
-     * our local APIC.  We have to wait for the IPI or we'll
-     * lock up on an APIC access.
-     */
-    while (!atomic_read(&init_deasserted));
-
-    /*
-     * (This works even if the APIC is not enabled.)
-     */
-    phys_id = GET_APIC_ID(apic_read(APIC_ID));
-    cpuid = smp_processor_id();
-    if (test_and_set_bit(cpuid, &cpu_online_map)) {
-        printk("huh, phys CPU#%d, CPU#%d already present??\n",
-               phys_id, cpuid);
-        BUG();
-    }
-    Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
-
-    /*
-     * STARTUP IPIs are fragile beasts as they might sometimes
-     * trigger some glue motherboard logic. Complete APIC bus
-     * silence for 1 second, this overestimates the time the
-     * boot CPU is spending to send the up to 2 STARTUP IPIs
-     * by a factor of two. This should be enough.
-     */
-
-    for ( i = 0; i < 200; i++ )
-    {
-        if ( test_bit(cpuid, &cpu_callout_map) ) break;
-        mdelay(10);
-    }
-
-    if (!test_bit(cpuid, &cpu_callout_map)) {
-        printk("BUG: CPU%d started up but did not get a callout!\n",
-               cpuid);
-        BUG();
-    }
-
-    /*
-     * the boot CPU has finished the init stage and is spinning
-     * on callin_map until we finish. We are free to set up this
-     * CPU, first the APIC. (this is probably redundant on most
-     * boards)
-     */
-
-    Dprintk("CALLIN, before setup_local_APIC().\n");
-
-    setup_local_APIC();
-
-    __sti();
-
-    Dprintk("Stack at about %p\n",&cpuid);
-
-    /*
-     * Save our processor parameters
-     */
-    smp_store_cpu_info(cpuid);
-
-    /*
-     * Allow the master to continue.
-     */
-    set_bit(cpuid, &cpu_callin_map);
-
-    /*
-     *      Synchronize the TSC with the BP
-     */
-    synchronize_tsc_ap();
+       int cpuid, phys_id, i;
+
+       /*
+        * If waken up by an INIT in an 82489DX configuration
+        * we may get here before an INIT-deassert IPI reaches
+        * our local APIC.  We have to wait for the IPI or we'll
+        * lock up on an APIC access.
+        */
+       wait_for_init_deassert(&init_deasserted);
+
+       /*
+        * (This works even if the APIC is not enabled.)
+        */
+       phys_id = GET_APIC_ID(apic_read(APIC_ID));
+       cpuid = smp_processor_id();
+       if (cpu_isset(cpuid, cpu_callin_map)) {
+               printk("huh, phys CPU#%d, CPU#%d already present??\n",
+                                       phys_id, cpuid);
+               BUG();
+       }
+       Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
+
+       /*
+        * STARTUP IPIs are fragile beasts as they might sometimes
+        * trigger some glue motherboard logic. Complete APIC bus
+        * silence for 1 second, this overestimates the time the
+        * boot CPU is spending to send the up to 2 STARTUP IPIs
+        * by a factor of two. This should be enough.
+        */
+
+       /*
+        * Waiting 2s total for startup
+        */
+       for (i = 0; i < 200; i++) {
+               /*
+                * Has the boot CPU finished it's STARTUP sequence?
+                */
+               if (cpu_isset(cpuid, cpu_callout_map))
+                       break;
+               rep_nop();
+               mdelay(10);
+       }
+
+       if (!cpu_isset(cpuid, cpu_callout_map)) {
+               printk("BUG: CPU%d started up but did not get a callout!\n",
+                       cpuid);
+               BUG();
+       }
+
+       /*
+        * the boot CPU has finished the init stage and is spinning
+        * on callin_map until we finish. We are free to set up this
+        * CPU, first the APIC. (this is probably redundant on most
+        * boards)
+        */
+
+       Dprintk("CALLIN, before setup_local_APIC().\n");
+       smp_callin_clear_local_apic();
+       setup_local_APIC();
+       map_cpu_to_logical_apicid();
+
+#if 0
+       /*
+        * Get our bogomips.
+        */
+       calibrate_delay();
+       Dprintk("Stack at about %p\n",&cpuid);
+#endif
+
+       /*
+        * Save our processor parameters
+        */
+       smp_store_cpu_info(cpuid);
+
+       disable_APIC_timer();
+
+       /*
+        * Allow the master to continue.
+        */
+       cpu_set(cpuid, cpu_callin_map);
+
+       /*
+        *      Synchronize the TSC with the BP
+        */
+       if (cpu_has_tsc && cpu_khz)
+               synchronize_tsc_ap();
 }
 
-static int cpucount;
+int cpucount;
 
-#ifdef __i386__
+#ifdef CONFIG_X86_32
 static void construct_percpu_idt(unsigned int cpu)
 {
-    unsigned char idt_load[10];
+       unsigned char idt_load[10];
 
-    idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES);
-    memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*sizeof(idt_entry_t));
+       idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES);
+       memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*sizeof(idt_entry_t));
 
-    *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*sizeof(idt_entry_t))-1;
-    *(unsigned long  *)(&idt_load[2]) = (unsigned long)idt_tables[cpu];
-    __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) );
+       *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*sizeof(idt_entry_t))-1;
+       *(unsigned long  *)(&idt_load[2]) = (unsigned long)idt_tables[cpu];
+       __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) );
 }
 #endif
 
 /*
  * Activate a secondary processor.
  */
-void __init start_secondary(void)
+void __init start_secondary(void *unused)
 {
-    unsigned int cpu = cpucount;
-
-    extern void percpu_traps_init(void);
-    extern void cpu_init(void);
-
-    set_current(idle_task[cpu]);
-    set_processor_id(cpu);
+       unsigned int cpu = cpucount;
 
-    percpu_traps_init();
+       extern void percpu_traps_init(void);
+       extern void cpu_init(void);
 
-    cpu_init();
+       set_current(idle_task[cpu]);
+       set_processor_id(cpu);
 
-    smp_callin();
+       percpu_traps_init();
 
-    while (!atomic_read(&smp_commenced))
-        cpu_relax();
+       cpu_init();
+       smp_callin();
+       while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
+               rep_nop();
 
-#ifdef __i386__
-    /*
-     * At this point, boot CPU has fully initialised the IDT. It is
-     * now safe to make ourselves a private copy.
-     */
-    construct_percpu_idt(cpu);
+#ifdef CONFIG_X86_32
+       /*
+        * At this point, boot CPU has fully initialised the IDT. It is
+        * now safe to make ourselves a private copy.
+        */
+       construct_percpu_idt(cpu);
 #endif
 
-    local_flush_tlb();
+       setup_secondary_APIC_clock();
+       enable_APIC_timer();
 
-    startup_cpu_idle_loop();
+       /*
+        * low-memory mappings have been cleared, flush them from
+        * the local TLBs too.
+        */
+       local_flush_tlb();
+       cpu_set(smp_processor_id(), cpu_online_map);
 
-    BUG();
+       /* We can take interrupts now: we're officially "up". */
+       local_irq_enable();
+
+       wmb();
+       startup_cpu_idle_loop();
 }
 
 extern struct {
-    unsigned long esp, ss;
+       void * esp;
+       unsigned short ss;
 } stack_start;
 
-/* which physical APIC ID maps to which logical CPU number */
-volatile int physical_apicid_2_cpu[MAX_APICID];
-/* which logical CPU number maps to which physical APIC ID */
-volatile int cpu_2_physical_apicid[NR_CPUS];
+#ifdef CONFIG_NUMA
 
-/* which logical APIC ID maps to which logical CPU number */
-volatile int logical_apicid_2_cpu[MAX_APICID];
-/* which logical CPU number maps to which logical APIC ID */
-volatile int cpu_2_logical_apicid[NR_CPUS];
+/* which logical CPUs are on which nodes */
+cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
+                               { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
+/* which node each logical CPU is on */
+int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
+EXPORT_SYMBOL(cpu_2_node);
 
-static inline void init_cpu_to_apicid(void)
-/* Initialize all maps between cpu number and apicids */
+/* set up a mapping between cpu and node. */
+static inline void map_cpu_to_node(int cpu, int node)
 {
-    int apicid, cpu;
-
-    for (apicid = 0; apicid < MAX_APICID; apicid++) {
-        physical_apicid_2_cpu[apicid] = -1;
-        logical_apicid_2_cpu[apicid] = -1;
-    }
-    for (cpu = 0; cpu < NR_CPUS; cpu++) {
-        cpu_2_physical_apicid[cpu] = -1;
-        cpu_2_logical_apicid[cpu] = -1;
-    }
+       printk("Mapping cpu %d to node %d\n", cpu, node);
+       cpu_set(cpu, node_2_cpu_mask[node]);
+       cpu_2_node[cpu] = node;
 }
 
-static inline void map_cpu_to_boot_apicid(int cpu, int apicid)
-/* 
- * set up a mapping between cpu and apicid. Uses logical apicids for multiquad,
- * else physical apic ids
- */
+/* undo a mapping between cpu and node. */
+static inline void unmap_cpu_to_node(int cpu)
 {
-    physical_apicid_2_cpu[apicid] = cpu;       
-    cpu_2_physical_apicid[cpu] = apicid;
+       int node;
+
+       printk("Unmapping cpu %d from all nodes\n", cpu);
+       for (node = 0; node < MAX_NUMNODES; node ++)
+               cpu_clear(cpu, node_2_cpu_mask[node]);
+       cpu_2_node[cpu] = 0;
 }
+#else /* !CONFIG_NUMA */
 
-static inline void unmap_cpu_to_boot_apicid(int cpu, int apicid)
-/* 
- * undo a mapping between cpu and apicid. Uses logical apicids for multiquad,
- * else physical apic ids
- */
+#define map_cpu_to_node(cpu, node)     ({})
+#define unmap_cpu_to_node(cpu) ({})
+
+#endif /* CONFIG_NUMA */
+
+u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+void map_cpu_to_logical_apicid(void)
+{
+       int cpu = smp_processor_id();
+       int apicid = logical_smp_processor_id();
+
+       cpu_2_logical_apicid[cpu] = apicid;
+       map_cpu_to_node(cpu, apicid_to_node(apicid));
+}
+
+void unmap_cpu_to_logical_apicid(int cpu)
 {
-    physical_apicid_2_cpu[apicid] = -1;        
-    cpu_2_physical_apicid[cpu] = -1;
+       cpu_2_logical_apicid[cpu] = BAD_APICID;
+       unmap_cpu_to_node(cpu);
 }
 
 #if APIC_DEBUG
-static inline void inquire_remote_apic(int apicid)
+static inline void __inquire_remote_apic(int apicid)
 {
-    int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
-    char *names[] = { "ID", "VERSION", "SPIV" };
-    int timeout, status;
-
-    printk("Inquiring remote APIC #%d...\n", apicid);
-
-    for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
-        printk("... APIC #%d %s: ", apicid, names[i]);
-
-        /*
-         * Wait for idle.
-         */
-        apic_wait_icr_idle();
-
-        apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
-        apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
-
-        timeout = 0;
-        do {
-            udelay(100);
-            status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
-        } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
-
-        switch (status) {
-        case APIC_ICR_RR_VALID:
-            status = apic_read(APIC_RRR);
-            printk("%08x\n", status);
-            break;
-        default:
-            printk("failed\n");
-        }
-    }
+       int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
+       char *names[] = { "ID", "VERSION", "SPIV" };
+       int timeout, status;
+
+       printk("Inquiring remote APIC #%d...\n", apicid);
+
+       for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
+               printk("... APIC #%d %s: ", apicid, names[i]);
+
+               /*
+                * Wait for idle.
+                */
+               apic_wait_icr_idle();
+
+               apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+               apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
+
+               timeout = 0;
+               do {
+                       udelay(100);
+                       status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
+               } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
+
+               switch (status) {
+               case APIC_ICR_RR_VALID:
+                       status = apic_read(APIC_RRR);
+                       printk("%08x\n", status);
+                       break;
+               default:
+                       printk("failed\n");
+               }
+       }
 }
 #endif
 
+#ifdef WAKE_SECONDARY_VIA_NMI
+/* 
+ * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
+ * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
+ * won't ... remember to clear down the APIC, etc later.
+ */
+static int __init
+wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
+{
+       unsigned long send_status = 0, accept_status = 0;
+       int timeout, maxlvt;
+
+       /* Target chip */
+       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
+
+       /* Boot on the stack */
+       /* Kick the second */
+       apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
+
+       Dprintk("Waiting for send to finish...\n");
+       timeout = 0;
+       do {
+               Dprintk("+");
+               udelay(100);
+               send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+       } while (send_status && (timeout++ < 1000));
+
+       /*
+        * Give the other CPU some time to accept the IPI.
+        */
+       udelay(200);
+       /*
+        * Due to the Pentium erratum 3AP.
+        */
+       maxlvt = get_maxlvt();
+       if (maxlvt > 3) {
+               apic_read_around(APIC_SPIV);
+               apic_write(APIC_ESR, 0);
+       }
+       accept_status = (apic_read(APIC_ESR) & 0xEF);
+       Dprintk("NMI sent.\n");
+
+       if (send_status)
+               printk("APIC never delivered???\n");
+       if (accept_status)
+               printk("APIC delivery error (%lx).\n", accept_status);
+
+       return (send_status | accept_status);
+}
+#endif /* WAKE_SECONDARY_VIA_NMI */
 
-static int wakeup_secondary_via_INIT(int phys_apicid, unsigned long start_eip)
+#ifdef WAKE_SECONDARY_VIA_INIT
+static int __init
+wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
 {
-    unsigned long send_status = 0, accept_status = 0;
-    int maxlvt, timeout, num_starts, j;
-
-    Dprintk("Asserting INIT.\n");
-
-    /*
-     * Turn INIT on target chip
-     */
-    apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
-    /*
-     * Send IPI
-     */
-    apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
-                      | APIC_DM_INIT);
-
-    Dprintk("Waiting for send to finish...\n");
-    timeout = 0;
-    do {
-        Dprintk("+");
-        udelay(100);
-        send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
-    } while (send_status && (timeout++ < 1000));
-
-    mdelay(10);
-
-    Dprintk("Deasserting INIT.\n");
-
-    /* Target chip */
-    apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
-    /* Send IPI */
-    apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
-
-    Dprintk("Waiting for send to finish...\n");
-    timeout = 0;
-    do {
-        Dprintk("+");
-        udelay(100);
-        send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
-    } while (send_status && (timeout++ < 1000));
-
-    atomic_set(&init_deasserted, 1);
-
-    /*
-     * Should we send STARTUP IPIs ?
-     *
-     * Determine this based on the APIC version.
-     * If we don't have an integrated APIC, don't send the STARTUP IPIs.
-     */
-    if (APIC_INTEGRATED(apic_version[phys_apicid]))
-        num_starts = 2;
-    else
-        num_starts = 0;
-
-    /*
-     * Run STARTUP IPI loop.
-     */
-    Dprintk("#startup loops: %d.\n", num_starts);
-
-    maxlvt = get_maxlvt();
-
-    for (j = 1; j <= num_starts; j++) {
-        Dprintk("Sending STARTUP #%d.\n",j);
-
-        apic_read_around(APIC_SPIV);
-        apic_write(APIC_ESR, 0);
-        apic_read(APIC_ESR);
-        Dprintk("After apic_write.\n");
-
-        /*
-         * STARTUP IPI
-         */
-
-        /* Target chip */
-        apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
-        /* Boot on the stack */
-        /* Kick the second */
-        apic_write_around(APIC_ICR, APIC_DM_STARTUP
-                          | (start_eip >> 12));
-
-        /*
-         * Give the other CPU some time to accept the IPI.
-         */
-        udelay(300);
-
-        Dprintk("Startup point 1.\n");
-
-        Dprintk("Waiting for send to finish...\n");
-        timeout = 0;
-        do {
-            Dprintk("+");
-            udelay(100);
-            send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
-        } while (send_status && (timeout++ < 1000));
-
-        /*
-         * Give the other CPU some time to accept the IPI.
-         */
-        udelay(200);
-        /*
-         * Due to the Pentium erratum 3AP.
-         */
-        if (maxlvt > 3) {
-            apic_read_around(APIC_SPIV);
-            apic_write(APIC_ESR, 0);
-        }
-        accept_status = (apic_read(APIC_ESR) & 0xEF);
-        if (send_status || accept_status)
-            break;
-    }
-    Dprintk("After Startup.\n");
-
-    if (send_status)
-        printk("APIC never delivered???\n");
-    if (accept_status)
-        printk("APIC delivery error (%lx).\n", accept_status);
-
-    return (send_status | accept_status);
+       unsigned long send_status = 0, accept_status = 0;
+       int maxlvt, timeout, num_starts, j;
+
+       /*
+        * Be paranoid about clearing APIC errors.
+        */
+       if (APIC_INTEGRATED(apic_version[phys_apicid])) {
+               apic_read_around(APIC_SPIV);
+               apic_write(APIC_ESR, 0);
+               apic_read(APIC_ESR);
+       }
+
+       Dprintk("Asserting INIT.\n");
+
+       /*
+        * Turn INIT on target chip
+        */
+       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+       /*
+        * Send IPI
+        */
+       apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
+                               | APIC_DM_INIT);
+
+       Dprintk("Waiting for send to finish...\n");
+       timeout = 0;
+       do {
+               Dprintk("+");
+               udelay(100);
+               send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+       } while (send_status && (timeout++ < 1000));
+
+       mdelay(10);
+
+       Dprintk("Deasserting INIT.\n");
+
+       /* Target chip */
+       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+       /* Send IPI */
+       apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+
+       Dprintk("Waiting for send to finish...\n");
+       timeout = 0;
+       do {
+               Dprintk("+");
+               udelay(100);
+               send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+       } while (send_status && (timeout++ < 1000));
+
+       atomic_set(&init_deasserted, 1);
+
+       /*
+        * Should we send STARTUP IPIs ?
+        *
+        * Determine this based on the APIC version.
+        * If we don't have an integrated APIC, don't send the STARTUP IPIs.
+        */
+       if (APIC_INTEGRATED(apic_version[phys_apicid]))
+               num_starts = 2;
+       else
+               num_starts = 0;
+
+       /*
+        * Run STARTUP IPI loop.
+        */
+       Dprintk("#startup loops: %d.\n", num_starts);
+
+       maxlvt = get_maxlvt();
+
+       for (j = 1; j <= num_starts; j++) {
+               Dprintk("Sending STARTUP #%d.\n",j);
+               apic_read_around(APIC_SPIV);
+               apic_write(APIC_ESR, 0);
+               apic_read(APIC_ESR);
+               Dprintk("After apic_write.\n");
+
+               /*
+                * STARTUP IPI
+                */
+
+               /* Target chip */
+               apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+               /* Boot on the stack */
+               /* Kick the second */
+               apic_write_around(APIC_ICR, APIC_DM_STARTUP
+                                       | (start_eip >> 12));
+
+               /*
+                * Give the other CPU some time to accept the IPI.
+                */
+               udelay(300);
+
+               Dprintk("Startup point 1.\n");
+
+               Dprintk("Waiting for send to finish...\n");
+               timeout = 0;
+               do {
+                       Dprintk("+");
+                       udelay(100);
+                       send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+               } while (send_status && (timeout++ < 1000));
+
+               /*
+                * Give the other CPU some time to accept the IPI.
+                */
+               udelay(200);
+               /*
+                * Due to the Pentium erratum 3AP.
+                */
+               if (maxlvt > 3) {
+                       apic_read_around(APIC_SPIV);
+                       apic_write(APIC_ESR, 0);
+               }
+               accept_status = (apic_read(APIC_ESR) & 0xEF);
+               if (send_status || accept_status)
+                       break;
+       }
+       Dprintk("After Startup.\n");
+
+       if (send_status)
+               printk("APIC never delivered???\n");
+       if (accept_status)
+               printk("APIC delivery error (%lx).\n", accept_status);
+
+       return (send_status | accept_status);
 }
+#endif /* WAKE_SECONDARY_VIA_INIT */
 
-extern unsigned long cpu_initialized;
+extern cpumask_t cpu_initialized;
 
-static void __init do_boot_cpu (int apicid) 
+static int __init do_boot_cpu(int apicid)
 /*
  * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
  * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
+ * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
  */
 {
-    struct domain *idle;
-    struct exec_domain *ed;
-    unsigned long boot_error = 0;
-    int timeout, cpu;
-    unsigned long start_eip;
-    void *stack;
-
-    cpu = ++cpucount;
+       struct domain *idle;
+       struct exec_domain *ed;
+       void *stack;
+       unsigned long boot_error;
+       int timeout, cpu;
+       unsigned long start_eip;
+       unsigned short nmi_high = 0, nmi_low = 0;
 
-    if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
-        panic("failed 'createdomain' for CPU %d", cpu);
+       cpu = ++cpucount;
 
-    ed = idle->exec_domain[0];
+       if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
+               panic("failed 'createdomain' for CPU %d", cpu);
 
-    set_bit(_DOMF_idle_domain, &idle->domain_flags);
+       ed = idle_task[cpu] = idle->exec_domain[0];
 
-    ed->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
+       set_bit(_DOMF_idle_domain, &idle->domain_flags);
 
-    map_cpu_to_boot_apicid(cpu, apicid);
+       ed->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
 
-    idle_task[cpu] = ed;
+       /* start_eip had better be page-aligned! */
+       start_eip = setup_trampoline();
 
-    /* start_eip had better be page-aligned! */
-    start_eip = setup_trampoline();
+       /* So we see what's up   */
+       printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
 
-    /* So we see what's up. */
-    printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
-
-    stack = (void *)alloc_xenheap_pages(STACK_ORDER);
+       stack = (void *)alloc_xenheap_pages(STACK_ORDER);
 #if defined(__i386__)
-    stack_start.esp = __pa(stack);
+       stack_start.esp = (void *)__pa(stack);
 #elif defined(__x86_64__)
-    stack_start.esp = (unsigned long)stack;
+       stack_start.esp = stack;
 #endif
-    stack_start.esp += STACK_SIZE - sizeof(struct cpu_info);
-
-    /* Debug build: detect stack overflow by setting up a guard page. */
-    memguard_guard_stack(stack);
-
-    /*
-     * This grunge runs the startup process for
-     * the targeted processor.
-     */
-
-    atomic_set(&init_deasserted, 0);
-
-    Dprintk("Setting warm reset code and vector.\n");
-
-    CMOS_WRITE(0xa, 0xf);
-    local_flush_tlb();
-    Dprintk("1.\n");
-    *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
-    Dprintk("2.\n");
-    *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
-    Dprintk("3.\n");
-
-    /*
-     * Be paranoid about clearing APIC errors.
-     */
-    if ( APIC_INTEGRATED(apic_version[apicid]) )
-    {
-        apic_read_around(APIC_SPIV);
-        apic_write(APIC_ESR, 0);
-        apic_read(APIC_ESR);
-    }
-
-    /*
-     * Status is now clean
-     */
-    boot_error = 0;
-
-    /*
-     * Starting actual IPI sequence...
-     */
-
-    boot_error = wakeup_secondary_via_INIT(apicid, start_eip);
-
-    if (!boot_error) {
-        /*
-         * allow APs to start initializing.
-         */
-        Dprintk("Before Callout %d.\n", cpu);
-        set_bit(cpu, &cpu_callout_map);
-        Dprintk("After Callout %d.\n", cpu);
-
-        /*
-         * Wait 5s total for a response
-         */
-        for (timeout = 0; timeout < 50000; timeout++) {
-            if (test_bit(cpu, &cpu_callin_map))
-                break; /* It has booted */
-            udelay(100);
-        }
-
-        if (test_bit(cpu, &cpu_callin_map)) {
-            /* number CPUs logically, starting from 1 (BSP is 0) */
-            printk("CPU%d has booted.\n", cpu);
-        } else {
-            boot_error= 1;
-            if (*((volatile unsigned int *)phys_to_virt(start_eip))
-                == 0xA5A5A5A5)
+       stack_start.esp += STACK_SIZE - sizeof(struct cpu_info);
+
+       /* Debug build: detect stack overflow by setting up a guard page. */
+       memguard_guard_stack(stack);
+
+       /*
+        * This grunge runs the startup process for
+        * the targeted processor.
+        */
+
+       atomic_set(&init_deasserted, 0);
+
+       Dprintk("Setting warm reset code and vector.\n");
+
+       store_NMI_vector(&nmi_high, &nmi_low);
+
+       smpboot_setup_warm_reset_vector(start_eip);
+
+       /*
+        * Starting actual IPI sequence...
+        */
+       boot_error = wakeup_secondary_cpu(apicid, start_eip);
+
+       if (!boot_error) {
+               /*
+                * allow APs to start initializing.
+                */
+               Dprintk("Before Callout %d.\n", cpu);
+               cpu_set(cpu, cpu_callout_map);
+               Dprintk("After Callout %d.\n", cpu);
+
+               /*
+                * Wait 5s total for a response
+                */
+               for (timeout = 0; timeout < 50000; timeout++) {
+                       if (cpu_isset(cpu, cpu_callin_map))
+                               break;  /* It has booted */
+                       udelay(100);
+               }
+
+               if (cpu_isset(cpu, cpu_callin_map)) {
+                       /* number CPUs logically, starting from 1 (BSP is 0) */
+                       Dprintk("OK.\n");
+                       printk("CPU%d: ", cpu);
+                       print_cpu_info(&cpu_data[cpu]);
+                       Dprintk("CPU has booted.\n");
+               } else {
+                       boot_error= 1;
+                       if (*((volatile unsigned char *)trampoline_base)
+                                       == 0xA5)
                                /* trampoline started but...? */
-                printk("Stuck ??\n");
-            else
+                               printk("Stuck ??\n");
+                       else
                                /* trampoline code not run */
-                printk("Not responding.\n");
-#if APIC_DEBUG
-            inquire_remote_apic(apicid);
-#endif
-        }
-    }
-    if (boot_error) {
-        /* Try to put things back the way they were before ... */
-        unmap_cpu_to_boot_apicid(cpu, apicid);
-        clear_bit(cpu, &cpu_callout_map); /* was set here (do_boot_cpu()) */
-        clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
-        clear_bit(cpu, &cpu_online_map);  /* was set in smp_callin() */
-        cpucount--;
-    }
+                               printk("Not responding.\n");
+                       inquire_remote_apic(apicid);
+               }
+       }
+       x86_cpu_to_apicid[cpu] = apicid;
+       if (boot_error) {
+               /* Try to put things back the way they were before ... */
+               unmap_cpu_to_logical_apicid(cpu);
+               cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
+               cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
+               cpucount--;
+       }
+
+       /* mark "stuck" area as not stuck */
+       *((volatile unsigned long *)trampoline_base) = 0;
+
+       return boot_error;
 }
 
+#if 0
+cycles_t cacheflush_time;
+unsigned long cache_decay_ticks;
+
+static void smp_tune_scheduling (void)
+{
+       unsigned long cachesize;       /* kB   */
+       unsigned long bandwidth = 350; /* MB/s */
+       /*
+        * Rough estimation for SMP scheduling, this is the number of
+        * cycles it takes for a fully memory-limited process to flush
+        * the SMP-local cache.
+        *
+        * (For a P5 this pretty much means we will choose another idle
+        *  CPU almost always at wakeup time (this is due to the small
+        *  L1 cache), on PIIs it's around 50-100 usecs, depending on
+        *  the cache size)
+        */
+
+       if (!cpu_khz) {
+               /*
+                * this basically disables processor-affinity
+                * scheduling on SMP without a TSC.
+                */
+               cacheflush_time = 0;
+               return;
+       } else {
+               cachesize = boot_cpu_data.x86_cache_size;
+               if (cachesize == -1) {
+                       cachesize = 16; /* Pentiums, 2x8kB cache */
+                       bandwidth = 100;
+               }
+
+               cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
+       }
+
+       cache_decay_ticks = (long)cacheflush_time/cpu_khz + 1;
+
+       printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
+               (long)cacheflush_time/(cpu_khz/1000),
+               ((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
+       printk("task migration cache decay timeout: %ld msecs.\n",
+               cache_decay_ticks);
+}
+#else
+#define smp_tune_scheduling() ((void)0)
+#endif
 
 /*
  * Cycle through the processors sending APIC IPIs to boot each.
@@ -776,178 +913,274 @@ static void __init do_boot_cpu (int apicid)
 
 static int boot_cpu_logical_apicid;
 /* Where the IO area was mapped on multiquad, always 0 otherwise */
-void *xquad_portio = NULL;
+void *xquad_portio;
+
+cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
 
-void __init smp_boot_cpus(void)
+static void __init smp_boot_cpus(unsigned int max_cpus)
 {
-    int apicid, bit;
-
-    /* Initialize the logical to physical CPU number mapping */
-    init_cpu_to_apicid();
-
-    /*
-     * Setup boot CPU information
-     */
-    smp_store_cpu_info(0); /* Final full version of the data */
-    printk("CPU%d booted\n", 0);
-
-    /*
-     * We have the boot CPU online for sure.
-     */
-    set_bit(0, &cpu_online_map);
-    boot_cpu_logical_apicid = logical_smp_processor_id();
-    map_cpu_to_boot_apicid(0, boot_cpu_apicid);
-
-    /*
-     * If we couldnt find an SMP configuration at boot time,
-     * get out of here now!
-     */
-    if (!smp_found_config || opt_nosmp) {
-        io_apic_irqs = 0;
-        phys_cpu_present_map = physid_mask_of_physid(0);
-        cpu_online_map = 1;
-        smp_num_cpus = 1;
-        if (APIC_init_uniprocessor())
-            printk("Local APIC not detected."
-                   " Using dummy APIC emulation.\n");
-        goto smp_done;
-    }
-
-    /*
-     * Should not be necessary because the MP table should list the boot
-     * CPU too, but we do it for the sake of robustness anyway.
-     */
-    if (!test_bit(boot_cpu_physical_apicid, &phys_cpu_present_map)) {
-        printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
-               boot_cpu_physical_apicid);
-        physid_set(hard_smp_processor_id(), phys_cpu_present_map);
-    }
-
-    /*
-     * If we couldn't find a local APIC, then get out of here now!
-     */
-    if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
-        !test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) {
-        printk("BIOS bug, local APIC #%d not detected!...\n",
-               boot_cpu_physical_apicid);
-        printk("... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
-        io_apic_irqs = 0;
-        phys_cpu_present_map = physid_mask_of_physid(0);
-        cpu_online_map = 1;
-        smp_num_cpus = 1;
-        goto smp_done;
-    }
-
-    verify_local_APIC();
-
-    /*
-     * If SMP should be disabled, then really disable it!
-     */
-    if (!max_cpus) {
-        smp_found_config = 0;
-        printk("SMP mode deactivated, forcing use of dummy APIC emulation.\n");
-        io_apic_irqs = 0;
-        phys_cpu_present_map = physid_mask_of_physid(0);
-        cpu_online_map = 1;
-        smp_num_cpus = 1;
-        goto smp_done;
-    }
-
-    connect_bsp_APIC();
-    setup_local_APIC();
-
-    if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid)
-        BUG();
-
-    /*
-     * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
-     *
-     * In clustered apic mode, phys_cpu_present_map is a constructed thus:
-     * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the 
-     * clustered apic ID.
-     */
-    Dprintk("CPU present map: %lx\n", phys_cpu_present_map);
-
-    for (bit = 0; bit < NR_CPUS; bit++) {
-        apicid = cpu_present_to_apicid(bit);
-        /*
-         * Don't even attempt to start the boot CPU!
-         */
-        if (apicid == boot_cpu_apicid)
-            continue;
-
-        /* 
-         * Don't start hyperthreads if option noht requested.
-         */
-        if (opt_noht && (apicid & (ht_per_core - 1)))
-            continue;
-
-        if (!check_apicid_present(bit))
-            continue;
-        if ((max_cpus >= 0) && (max_cpus <= cpucount+1))
-            continue;
-
-        do_boot_cpu(apicid);
-
-        /*
-         * Make sure we unmap all failed CPUs
-         */
-        if ((boot_apicid_to_cpu(apicid) == -1) &&
-            (!check_apicid_present(bit)))
-            printk("CPU #%d not responding - cannot use it.\n",
-                   apicid);
-    }
-
-    /*
-     * Cleanup possible dangling ends...
-     */
-    /*
-     * Install writable page 0 entry to set BIOS data area.
-     */
-    local_flush_tlb();
-
-    /*
-     * Paranoid:  Set warm reset code and vector here back
-     * to default values.
-     */
-    CMOS_WRITE(0, 0xf);
-
-    *((volatile long *) phys_to_virt(0x467)) = 0;
-
-    if (!cpucount) {
-        printk("Error: only one processor found.\n");
-    } else {
-        printk("Total of %d processors activated.\n", cpucount+1);
-    }
-    smp_num_cpus = cpucount + 1;
-
-    Dprintk("Boot done.\n");
-
-    /*
-     * Here we can be sure that there is an IO-APIC in the system. Let's
-     * go and set it up:
-     */
-    if ( nr_ioapics ) setup_IO_APIC();
-
-    /* Set up all local APIC timers in the system. */
-    {
-        extern void setup_APIC_clocks(void);
-        setup_APIC_clocks();
-    }
-
-    /* Synchronize the TSC with the AP(s). */
-    if ( cpucount ) synchronize_tsc_bp();
-
- smp_done:
-    ;
+       int apicid, cpu, bit, kicked;
+#ifdef BOGOMIPS
+       unsigned long bogosum = 0;
+#endif
+
+       /*
+        * Setup boot CPU information
+        */
+       smp_store_cpu_info(0); /* Final full version of the data */
+       printk("CPU%d: ", 0);
+       print_cpu_info(&cpu_data[0]);
+
+       boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+       boot_cpu_logical_apicid = logical_smp_processor_id();
+       x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
+
+       /*current_thread_info()->cpu = 0;*/
+       smp_tune_scheduling();
+       cpus_clear(cpu_sibling_map[0]);
+       cpu_set(0, cpu_sibling_map[0]);
+
+       /*
+        * If we couldn't find an SMP configuration at boot time,
+        * get out of here now!
+        */
+       if (!smp_found_config && !acpi_lapic) {
+               printk(KERN_NOTICE "SMP motherboard not detected.\n");
+               smpboot_clear_io_apic_irqs();
+               phys_cpu_present_map = physid_mask_of_physid(0);
+               if (APIC_init_uniprocessor())
+                       printk(KERN_NOTICE "Local APIC not detected."
+                                          " Using dummy APIC emulation.\n");
+               map_cpu_to_logical_apicid();
+               return;
+       }
+
+       /*
+        * Should not be necessary because the MP table should list the boot
+        * CPU too, but we do it for the sake of robustness anyway.
+        * Makes no sense to do this check in clustered apic mode, so skip it
+        */
+       if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
+               printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
+                               boot_cpu_physical_apicid);
+               physid_set(hard_smp_processor_id(), phys_cpu_present_map);
+       }
+
+       /*
+        * If we couldn't find a local APIC, then get out of here now!
+        */
+       if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
+               printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+                       boot_cpu_physical_apicid);
+               printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
+               smpboot_clear_io_apic_irqs();
+               phys_cpu_present_map = physid_mask_of_physid(0);
+               return;
+       }
+
+       verify_local_APIC();
+
+       /*
+        * If SMP should be disabled, then really disable it!
+        */
+       if (!max_cpus) {
+               smp_found_config = 0;
+               printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
+               smpboot_clear_io_apic_irqs();
+               phys_cpu_present_map = physid_mask_of_physid(0);
+               return;
+       }
+
+       connect_bsp_APIC();
+       setup_local_APIC();
+       map_cpu_to_logical_apicid();
+
+
+       setup_portio_remap();
+
+       /*
+        * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
+        *
+        * In clustered apic mode, phys_cpu_present_map is a constructed thus:
+        * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the 
+        * clustered apic ID.
+        */
+       Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
+
+       kicked = 1;
+       for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) {
+               apicid = cpu_present_to_apicid(bit);
+               /*
+                * Don't even attempt to start the boot CPU!
+                */
+               if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID))
+                       continue;
+
+               if (!check_apicid_present(bit))
+                       continue;
+               if (max_cpus <= cpucount+1)
+                       continue;
+
+               if (do_boot_cpu(apicid))
+                       printk("CPU #%d not responding - cannot use it.\n",
+                                                               apicid);
+               else
+                       ++kicked;
+       }
+
+       /*
+        * Cleanup possible dangling ends...
+        */
+       smpboot_restore_warm_reset_vector();
+
+#ifdef BOGOMIPS
+       /*
+        * Allow the user to impress friends.
+        */
+       Dprintk("Before bogomips.\n");
+       for (cpu = 0; cpu < NR_CPUS; cpu++)
+               if (cpu_isset(cpu, cpu_callout_map))
+                       bogosum += cpu_data[cpu].loops_per_jiffy;
+       printk(KERN_INFO
+               "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+               cpucount+1,
+               bogosum/(500000/HZ),
+               (bogosum/(5000/HZ))%100);
+#else
+       printk("Total of %d processors activated.\n", cpucount+1);
+#endif
+       
+       Dprintk("Before bogocount - setting activated=1.\n");
+
+       if (smp_b_stepping)
+               printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
+
+       /*
+        * Don't taint if we are running SMP kernel on a single non-MP
+        * approved Athlon
+        */
+       if (tainted & TAINT_UNSAFE_SMP) {
+               if (cpucount)
+                       printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
+               else
+                       tainted &= ~TAINT_UNSAFE_SMP;
+       }
+
+       Dprintk("Boot done.\n");
+
+       /*
+        * construct cpu_sibling_map[], so that we can tell sibling CPUs
+        * efficiently.
+        */
+       for (cpu = 0; cpu < NR_CPUS; cpu++)
+               cpus_clear(cpu_sibling_map[cpu]);
+
+       for (cpu = 0; cpu < NR_CPUS; cpu++) {
+               int siblings = 0;
+               int i;
+               if (!cpu_isset(cpu, cpu_callout_map))
+                       continue;
+
+               if (smp_num_siblings > 1) {
+                       for (i = 0; i < NR_CPUS; i++) {
+                               if (!cpu_isset(i, cpu_callout_map))
+                                       continue;
+                               if (phys_proc_id[cpu] == phys_proc_id[i]) {
+                                       siblings++;
+                                       cpu_set(i, cpu_sibling_map[cpu]);
+                               }
+                       }
+               } else {
+                       siblings++;
+                       cpu_set(cpu, cpu_sibling_map[cpu]);
+               }
+
+               if (siblings != smp_num_siblings)
+                       printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
+       }
+
+       if (nmi_watchdog == NMI_LOCAL_APIC)
+               check_nmi_watchdog();
+
+       smpboot_setup_io_apic();
+
+       setup_boot_APIC_clock();
+
+       /*
+        * Synchronize the TSC with the AP
+        */
+       if (cpu_has_tsc && cpucount && cpu_khz)
+               synchronize_tsc_bp();
 }
 
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
+/* These are wrappers to interface to the new boot process.  Someone
+   who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+       smp_boot_cpus(max_cpus);
+}
+
+void __devinit smp_prepare_boot_cpu(void)
+{
+       cpu_set(smp_processor_id(), cpu_online_map);
+       cpu_set(smp_processor_id(), cpu_callout_map);
+}
+
+int __devinit __cpu_up(unsigned int cpu)
+{
+       /* This only works at boot for x86.  See "rewrite" above. */
+       if (cpu_isset(cpu, smp_commenced_mask)) {
+               local_irq_enable();
+               return -ENOSYS;
+       }
+
+       /* In case one didn't come up */
+       if (!cpu_isset(cpu, cpu_callin_map)) {
+               local_irq_enable();
+               return -EIO;
+       }
+
+       local_irq_enable();
+       /* Unleash the CPU! */
+       cpu_set(cpu, smp_commenced_mask);
+       while (!cpu_isset(cpu, cpu_online_map))
+               mb();
+       return 0;
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+#ifdef CONFIG_X86_IO_APIC
+       setup_ioapic_dest();
+#endif
+#ifdef CONFIG_X86_64
+       zap_low_mappings();
+#endif
+       /*
+        * Disable executability of the SMP trampoline:
+        */
+       set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
+}
+
+#if 0
+void __init smp_intr_init(void)
+{
+       /*
+        * IRQ0 must be given a fixed assignment and initialized,
+        * because it's used before the IO-APIC is set up.
+        */
+       set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
+
+       /*
+        * The reschedule interrupt is a CPU-to-CPU reschedule-helper
+        * IPI, driven by wakeup.
+        */
+       set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
+
+       /* IPI for invalidation */
+       set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+
+       /* IPI for generic function call */
+       set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+}
+#endif
index d9a6a5999f3b9a87e808844846223143b28b93d7..3e3b770ae484e0449c8415b43562760e16c54fe2 100644 (file)
@@ -37,7 +37,6 @@ unsigned long cpu_khz;  /* Detected as we calibrate the TSC */
 unsigned long ticks_per_usec; /* TSC ticks per microsecond. */
 spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
 int timer_ack = 0;
-int do_timer_lists_from_pit = 0;
 unsigned long volatile jiffies;
 
 /* PRIVATE */
@@ -91,7 +90,7 @@ void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
     write_unlock_irq(&time_lock);
 
     /* Rough hack to allow accurate timers to sort-of-work with no APIC. */
-    if ( do_timer_lists_from_pit )
+    if ( !cpu_has_apic )
         raise_softirq(AC_TIMER_SOFTIRQ);
 }
 
index fc2ee40d7b66d1426f7c6fc18132ff3b3bc40f1b..7907fe269df59b7e253dd5b1666491c70ee70150 100644 (file)
@@ -99,6 +99,7 @@ integer_param("debug_stack_lines", debug_stack_lines);
 
 static inline int kernel_text_address(unsigned long addr)
 {
+    extern char _stext, _etext;
     if (addr >= (unsigned long) &_stext &&
         addr <= (unsigned long) &_etext)
         return 1;
index f3a3b541ef80a3b16a9cc9bb5f04c4779b663b7b..7c814c8ec9a96f23a3ad4a870024b538a2816f4f 100644 (file)
 #include <xen/lib.h>
 #include <xen/trace.h>
 #include <xen/sched.h>
+#include <xen/irq.h>
 #include <xen/softirq.h>
 #include <asm/current.h>
 #include <asm/io.h>
-#include <asm/irq.h>
 #include <asm/shadow.h>
 #include <asm/regs.h>
 #include <asm/cpufeature.h>
@@ -49,7 +49,7 @@ extern long evtchn_send(int lport);
 extern long do_block(void);
 void do_nmi(struct cpu_user_regs *, unsigned long);
 
-int start_vmx()
+int start_vmx(void)
 {
     struct vmcs_struct *vmcs;
     u32 ecx;
@@ -70,12 +70,14 @@ int start_vmx()
     if (eax & IA32_FEATURE_CONTROL_MSR_LOCK) {
         if ((eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON) == 0x0) {
                 printk("VMX disabled by Feature Control MSR.\n");
-               return 0;
+                return 0;
         }
     }
-    else 
+    else {
         wrmsr(IA32_FEATURE_CONTROL_MSR, 
-              IA32_FEATURE_CONTROL_MSR_LOCK | IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON, 0);
+              IA32_FEATURE_CONTROL_MSR_LOCK |
+              IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON, 0);
+    }
 
     set_in_cr4(X86_CR4_VMXE);   /* Enable VMXE */
 
@@ -93,7 +95,7 @@ int start_vmx()
     return 1;
 }
 
-void stop_vmx()
+void stop_vmx(void)
 {
     if (read_cr4() & X86_CR4_VMXE)
         __vmxoff();
@@ -167,7 +169,7 @@ static int vmx_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
     return result;
 }
 
-static void vmx_do_no_device_fault(
+static void vmx_do_no_device_fault(void)
 {
     unsigned long cr0;
         
index 4ffd4061fbe89ad958cd82033b5abbf0aa23387f..f46856fb2bab62c12534b2ba3e318b0600a57af7 100644 (file)
@@ -125,7 +125,7 @@ static int add_entry(struct ac_timer **heap, struct ac_timer *t)
         struct ac_timer **new_heap = xmalloc_array(struct ac_timer *, limit);
         if ( new_heap == NULL ) BUG();
         memcpy(new_heap, heap, (limit>>1)*sizeof(struct ac_timer *));
-        for ( i = 0; i < smp_num_cpus; i++ )
+        for ( i = 0; i < NR_CPUS; i++ )
             if ( ac_timers[i].heap == heap )
                 ac_timers[i].heap = new_heap;
         xfree(heap);
@@ -248,7 +248,7 @@ static void dump_timerq(unsigned char key)
     printk("Dumping ac_timer queues: NOW=0x%08X%08X\n",
            (u32)(now>>32), (u32)now); 
 
-    for ( i = 0; i < smp_num_cpus; i++ )
+    for_each_online_cpu( i )
     {
         printk("CPU[%02d] ", i);
         spin_lock_irqsave(&ac_timers[i].lock, flags);
@@ -270,7 +270,7 @@ void __init ac_timer_init(void)
 
     open_softirq(AC_TIMER_SOFTIRQ, ac_timer_softirq_action);
 
-    for ( i = 0; i < smp_num_cpus; i++ )
+    for ( i = 0; i < NR_CPUS; i++ )
     {
         ac_timers[i].heap = xmalloc_array(
             struct ac_timer *, DEFAULT_HEAP_LIMIT+1);
index df92bea13317365747c283c958278f5cbd2cea50..20cef35e29ec1c8d0303775e15d3d5ac346a9962 100644 (file)
@@ -155,7 +155,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
         unsigned int        pro;
         domid_t             dom;
         struct exec_domain *ed;
-        unsigned int        i, ht, cnt[NR_CPUS] = { 0 };
+        unsigned int        i, cnt[NR_CPUS] = { 0 };
 
 
         dom = op->u.createdomain.domain;
@@ -182,9 +182,8 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
          * domains will all share the second HT of each CPU. Since dom0 is on 
             * CPU 0, we favour high numbered CPUs in the event of a tie.
          */
-        ht = opt_noht ? 1 : ht_per_core;
-        pro = ht-1;
-        for ( i = pro; i < smp_num_cpus; i += ht )
+        pro = ht_per_core - 1;
+        for ( i = pro; i < num_online_cpus(); i += ht_per_core )
             if ( cnt[i] <= cnt[pro] )
                 pro = i;
 
@@ -269,7 +268,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
         else
         {
             /* pick a new cpu from the usable map */
-            int new_cpu = (int)find_first_set_bit(cpumap) % smp_num_cpus;
+            int new_cpu = (int)find_first_set_bit(cpumap) % num_online_cpus();
 
             exec_domain_pause(ed);
             if ( ed->processor != new_cpu )
index b7f104353cede56bc8d85b40f62f8ad0530ec3ab..835154051bd8b491d4872115fae130a47e79e264 100644 (file)
@@ -50,7 +50,10 @@ struct domain *do_createdomain(domid_t dom_id, unsigned int cpu)
     INIT_LIST_HEAD(&d->page_list);
     INIT_LIST_HEAD(&d->xenpage_list);
 
-    if ( (d->domain_id != IDLE_DOMAIN_ID) &&
+    if ( d->domain_id == IDLE_DOMAIN_ID )
+        set_bit(_DOMF_idle_domain, &d->domain_flags);
+
+    if ( !is_idle_task(d) &&
          ((init_event_channels(d) != 0) || (grant_table_create(d) != 0)) )
     {
         destroy_event_channels(d);
@@ -62,7 +65,7 @@ struct domain *do_createdomain(domid_t dom_id, unsigned int cpu)
     
     sched_add_domain(ed);
 
-    if ( d->domain_id != IDLE_DOMAIN_ID )
+    if ( !is_idle_task(d) )
     {
         write_lock(&domlist_lock);
         pd = &domain_list; /* NB. domain_list maintained in order of dom_id. */
index 72b25bd0ea463682e650575d7734479620c156a7..5b388cafbf2ce6747297a02cb9e24a90247e60b2 100644 (file)
@@ -45,8 +45,8 @@ string_param("badpage", opt_badpage);
 #define round_pgdown(_p)  ((_p)&PAGE_MASK)
 #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
 
-static spinlock_t page_scrub_lock;
-struct list_head page_scrub_list;
+static spinlock_t page_scrub_lock = SPIN_LOCK_UNLOCKED;
+LIST_HEAD(page_scrub_list);
 
 /*********************
  * ALLOCATION BITMAP
@@ -675,8 +675,6 @@ static void page_scrub_softirq(void)
 
 static __init int page_scrub_init(void)
 {
-    spin_lock_init(&page_scrub_lock);
-    INIT_LIST_HEAD(&page_scrub_list);
     open_softirq(PAGE_SCRUB_SOFTIRQ, page_scrub_softirq);
     return 0;
 }
index 157d49ffc8f0805146dcada2ed98ea1d71f8a4a9..7363fb98c7c9fe2cbc1b6a185e2e06a86da8e31f 100644 (file)
@@ -55,10 +55,11 @@ void perfc_printall(unsigned char key)
             break;
         case TYPE_CPU:
         case TYPE_S_CPU:
-            for ( j = sum = 0; j < smp_num_cpus; j++ )
+            sum = 0;
+            for_each_online_cpu ( j )
                 sum += atomic_read(&counters[j]);
             printk("TOTAL[%10d]  ", sum);
-            for ( j = 0; j < smp_num_cpus; j++ )
+            for_each_online_cpu ( j )
                 printk("CPU%02d[%10d]  ", j, atomic_read(&counters[j]));
             counters += NR_CPUS;
             break;
@@ -84,7 +85,7 @@ void perfc_printall(unsigned char key)
 
 void perfc_reset(unsigned char key)
 {
-    int i, j, sum;
+    int i, j;
     s_time_t now = NOW();
     atomic_t *counters = (atomic_t *)&perfcounters;
 
@@ -104,13 +105,13 @@ void perfc_reset(unsigned char key)
             counters += 1;
             break;
         case TYPE_CPU:
-            for ( j = sum = 0; j < smp_num_cpus; j++ )
+            for ( j = 0; j < NR_CPUS; j++ )
                 atomic_set(&counters[j],0);
         case TYPE_S_CPU:
             counters += NR_CPUS;
             break;
         case TYPE_ARRAY:
-            for ( j = sum = 0; j < perfc_info[i].nr_elements; j++ )
+            for ( j = 0; j < NR_CPUS; j++ )
                 atomic_set(&counters[j],0);
         case TYPE_S_ARRAY:
             counters += perfc_info[i].nr_elements;
@@ -146,7 +147,7 @@ static int perfc_copy_info(dom0_perfc_desc_t *desc)
                 break;
             case TYPE_CPU:
             case TYPE_S_CPU:
-                perfc_d[i].nr_vals = smp_num_cpus;
+                perfc_d[i].nr_vals = num_online_cpus();
                 break;
             case TYPE_ARRAY:
             case TYPE_S_ARRAY:
index 1ad20578f47bfac997b385fd6ca59524f6118c8b..227804ebafd4c412d37d8f0110012eb340b351e5 100644 (file)
@@ -169,14 +169,19 @@ static inline u32 calc_evt(struct exec_domain *d, u32 avt)
 static int bvt_alloc_task(struct exec_domain *ed)
 {
     struct domain *d = ed->domain;
-    if ( (d->sched_priv == NULL) ) {
+
+    if ( (d->sched_priv == NULL) )
+    {
         if ( (d->sched_priv = xmalloc(struct bvt_dom_info)) == NULL )
             return -1;
         memset(d->sched_priv, 0, sizeof(struct bvt_dom_info));
     }
+
     ed->sched_priv = &BVT_INFO(d)->ed_inf[ed->vcpu_id];
+
     BVT_INFO(d)->ed_inf[ed->vcpu_id].inf = BVT_INFO(d);
     BVT_INFO(d)->ed_inf[ed->vcpu_id].exec_domain = ed;
+
     return 0;
 }
 
@@ -190,6 +195,15 @@ static void bvt_add_task(struct exec_domain *d)
     ASSERT(inf != NULL);
     ASSERT(d   != NULL);
 
+    /* Allocate per-CPU context if this is the first domain to be added. */
+    if ( CPU_INFO(d->processor) == NULL )
+    {
+        schedule_data[d->processor].sched_priv = xmalloc(struct bvt_cpu_info);
+        BUG_ON(CPU_INFO(d->processor) == NULL);
+        INIT_LIST_HEAD(RUNQUEUE(d->processor));
+        CPU_SVT(d->processor) = 0;
+    }
+
     if ( d->vcpu_id == 0 )
     {
         inf->mcu_advance = MCU_ADVANCE;
@@ -213,9 +227,11 @@ static void bvt_add_task(struct exec_domain *d)
 
     einf->exec_domain = d;
 
-    if ( d->domain->domain_id == IDLE_DOMAIN_ID )
+    if ( is_idle_task(d->domain) )
     {
         einf->avt = einf->evt = ~0U;
+        BUG_ON(__task_on_runqueue(d));
+        __add_to_runqueue_head(d);
     } 
     else 
     {
@@ -225,20 +241,6 @@ static void bvt_add_task(struct exec_domain *d)
     }
 }
 
-static int bvt_init_idle_task(struct exec_domain *ed)
-{
-    if ( bvt_alloc_task(ed) < 0 )
-        return -1;
-
-    bvt_add_task(ed);
-
-    set_bit(_VCPUF_running, &ed->vcpu_flags);
-    if ( !__task_on_runqueue(ed) )
-        __add_to_runqueue_head(ed);
-
-    return 0;
-}
-
 static void bvt_wake(struct exec_domain *ed)
 {
     struct bvt_edom_info *einf = EBVT_INFO(ed);
@@ -548,36 +550,11 @@ static void bvt_dump_cpu_state(int i)
     }
 }
 
-/* Initialise the data structures. */
-static int bvt_init_scheduler(void)
-{
-    int i;
-
-    for ( i = 0; i < NR_CPUS; i++ )
-    {
-        schedule_data[i].sched_priv = xmalloc(struct bvt_cpu_info);
-       
-        if ( schedule_data[i].sched_priv == NULL )
-        {
-            printk("Failed to allocate BVT scheduler per-CPU memory!\n");
-            return -1;
-        }
-
-        INIT_LIST_HEAD(RUNQUEUE(i));
-        
-        CPU_SVT(i) = 0; /* XXX do I really need to do this? */
-    }
-
-    return 0;
-}
-
 struct scheduler sched_bvt_def = {
     .name     = "Borrowed Virtual Time",
     .opt_name = "bvt",
     .sched_id = SCHED_BVT,
     
-    .init_scheduler = bvt_init_scheduler,
-    .init_idle_task = bvt_init_idle_task,
     .alloc_task     = bvt_alloc_task,
     .add_task       = bvt_add_task,
     .free_task      = bvt_free_task,
index d4ed67ed5b48c0cce7f41fe8647e60bd8b5b80b6..3ea2db152201caf2c13e40f66d4669713e4e7db2 100644 (file)
 #include <xen/time.h>
 #include <xen/slab.h>
 
-/*#include <xen/adv_sched_hist.h>*/
-
 /*verbosity settings*/
 #define SEDFLEVEL 0
 #define PRINT(_f, _a...)  \
-if ((_f)<=SEDFLEVEL) printk(_a );
+    if ((_f)<=SEDFLEVEL) printk(_a );
 
 #ifndef NDEBUG
-       #define SEDF_STATS
-       #define CHECK(_p) if ( !(_p) ) \
      { printk("Check '%s' failed, line %d, file %s\n", #_p , __LINE__,\
      __FILE__);}
+#define SEDF_STATS
+#define CHECK(_p) if ( !(_p) ) \
+ { printk("Check '%s' failed, line %d, file %s\n", #_p , __LINE__,\
+ __FILE__);}
 #else
-       #define CHECK(_p) ((void)0)
+#define CHECK(_p) ((void)0)
 #endif
 
 /*various ways of unblocking domains*/
@@ -64,72 +62,72 @@ if ((_f)<=SEDFLEVEL) printk(_a );
 
 
 struct sedf_dom_info {
-       struct domain           *domain;
+    struct domain  *domain;
 };
 struct sedf_edom_info
 {
-       struct exec_domain      *exec_domain;
-       struct list_head        list;
-       struct list_head        extralist[2];
-       
-       /*Parameters for EDF*/
-       s_time_t                period;         /*=(relative deadline)*/
-       s_time_t                slice;          /*=worst case execution time*/
-       
-       /*Advaced Parameters*/
-       /*Latency Scaling*/
-       s_time_t                period_orig;    
-       s_time_t                slice_orig;
-       s_time_t                latency;
-       
-       /*status of domain*/
-       int                     status;
-       /*weights for "Scheduling for beginners/ lazy/ etc." ;)*/
-       short                   weight;
-        short                   extraweight;
-        /*Bookkeeping*/
-       s_time_t                deadl_abs;
-       s_time_t                sched_start_abs;
-       s_time_t                cputime;
-       /* times the domain un-/blocked */
-       s_time_t                block_abs;
-       s_time_t                unblock_abs;
-       
-       /*scores for {util, block penalty}-weighted extratime distribution*/
-       int                     score[2];       
-       s_time_t                short_block_lost_tot;
-       
-       /*Statistics*/
-       s_time_t                extra_time_tot;
+    struct exec_domain *exec_domain;
+    struct list_head list;
+    struct list_head extralist[2];
+    /*Parameters for EDF*/
+    s_time_t  period;  /*=(relative deadline)*/
+    s_time_t  slice;  /*=worst case execution time*/
+    /*Advaced Parameters*/
+    /*Latency Scaling*/
+    s_time_t  period_orig; 
+    s_time_t  slice_orig;
+    s_time_t  latency;
+    /*status of domain*/
+    int   status;
+    /*weights for "Scheduling for beginners/ lazy/ etc." ;)*/
+    short   weight;
+    short                   extraweight;
+    /*Bookkeeping*/
+    s_time_t  deadl_abs;
+    s_time_t  sched_start_abs;
+    s_time_t  cputime;
+    /* times the domain un-/blocked */
+    s_time_t  block_abs;
+    s_time_t  unblock_abs;
+    /*scores for {util, block penalty}-weighted extratime distribution*/
+    int   score[2]; 
+    s_time_t  short_block_lost_tot;
+    /*Statistics*/
+    s_time_t  extra_time_tot;
 
 #ifdef SEDF_STATS
-       s_time_t                block_time_tot;
-       s_time_t                penalty_time_tot;
-       int                     block_tot;
-       int                     short_block_tot;
-       int                     long_block_tot;
-       int                     short_cont;
-       int                     pen_extra_blocks;
-       int                     pen_extra_slices;
+    s_time_t  block_time_tot;
+    s_time_t  penalty_time_tot;
+    int   block_tot;
+    int   short_block_tot;
+    int   long_block_tot;
+    int   short_cont;
+    int   pen_extra_blocks;
+    int   pen_extra_slices;
 #endif
 };
 
 struct sedf_cpu_info {
-       struct list_head runnableq;
-       struct list_head waitq;
-       struct list_head extraq[2];
+    struct list_head runnableq;
+    struct list_head waitq;
+    struct list_head extraq[2];
 };
 
-#define EDOM_INFO(d)           ((struct sedf_edom_info *)((d)->sched_priv))
-#define CPU_INFO(cpu)  ((struct sedf_cpu_info *)schedule_data[cpu].sched_priv)
-#define LIST(d)                        (&EDOM_INFO(d)->list)
-#define EXTRALIST(d,i)         (&(EDOM_INFO(d)->extralist[i]))
-#define RUNQ(cpu)              (&CPU_INFO(cpu)->runnableq)
-#define WAITQ(cpu)             (&CPU_INFO(cpu)->waitq)
-#define EXTRAQ(cpu,i)                  (&(CPU_INFO(cpu)->extraq[i]))
-#define IDLETASK(cpu)          ((struct exec_domain *)schedule_data[cpu].idle)
+#define EDOM_INFO(d)  ((struct sedf_edom_info *)((d)->sched_priv))
+#define CPU_INFO(cpu) ((struct sedf_cpu_info *)schedule_data[cpu].sched_priv)
+#define LIST(d)   (&EDOM_INFO(d)->list)
+#define EXTRALIST(d,i)  (&(EDOM_INFO(d)->extralist[i]))
+#define RUNQ(cpu)     (&CPU_INFO(cpu)->runnableq)
+#define WAITQ(cpu)     (&CPU_INFO(cpu)->waitq)
+#define EXTRAQ(cpu,i)    (&(CPU_INFO(cpu)->extraq[i]))
+#define IDLETASK(cpu)  ((struct exec_domain *)schedule_data[cpu].idle)
 
-#define PERIOD_BEGIN(inf)      ((inf)->deadl_abs - (inf)->period)
+#define PERIOD_BEGIN(inf) ((inf)->deadl_abs - (inf)->period)
 
 #define MIN(x,y) (((x)<(y))?(x):(y))
 #define DIV_UP(x,y) (((x) + (y) - 1) / y)
@@ -142,8 +140,8 @@ struct sedf_cpu_info {
 static void sedf_dump_cpu_state(int i);
 
 static inline int extraq_on(struct exec_domain *d, int i) {
-       return ((EXTRALIST(d,i)->next != NULL) &&
-               (EXTRALIST(d,i)->next != EXTRALIST(d,i)));
+    return ((EXTRALIST(d,i)->next != NULL) &&
+            (EXTRALIST(d,i)->next != EXTRALIST(d,i)));
 }
 
 static inline void extraq_add_head(struct exec_domain *d, int i)
@@ -160,13 +158,13 @@ static inline void extraq_add_tail(struct exec_domain *d, int i)
 
 static inline void extraq_del(struct exec_domain *d, int i)
 {
-       struct list_head *list = EXTRALIST(d,i);
-       ASSERT(extraq_on(d,i));
-       PRINT(3, "Removing domain %i.%i from L%i extraq\n", d->domain->domain_id,
-          d->vcpu_id, i);      
-       list_del(list);
-       list->next = NULL;
-       ASSERT(!extraq_on(d, i));
+    struct list_head *list = EXTRALIST(d,i);
+    ASSERT(extraq_on(d,i));
+    PRINT(3, "Removing domain %i.%i from L%i extraq\n", d->domain->domain_id,
+          d->vcpu_id, i); 
+    list_del(list);
+    list->next = NULL;
+    ASSERT(!extraq_on(d, i));
 }
 
 /* adds a domain to the queue of processes which are aware of extra time. List
@@ -176,92 +174,92 @@ static inline void extraq_del(struct exec_domain *d, int i)
    charging each domain that recieved extratime with an inverse of its weight.
  */ 
 static inline void extraq_add_sort_update(struct exec_domain *d, int i, int sub) {
-       struct list_head      *cur;
-       struct sedf_edom_info *curinf;
-       
-       ASSERT(!extraq_on(d,i));
-       PRINT(3, "Adding domain %i.%i (score= %i, short_pen= %"PRIi64")"
-              " to L%i extraq\n",
-              d->domain->domain_id, d->vcpu_id, EDOM_INFO(d)->score[i],
-              EDOM_INFO(d)->short_block_lost_tot, i);  
-       /*iterate through all elements to find our "hole" and on our way
-         update all the other scores*/
-       list_for_each(cur,EXTRAQ(d->processor,i)){
-               curinf = list_entry(cur,struct sedf_edom_info,extralist[i]);
-               curinf->score[i] -= sub;
-               if (EDOM_INFO(d)->score[i] < curinf->score[i])
-                       break;
-               else
-                       PRINT(4,"\tbehind domain %i.%i (score= %i)\n",
-                             curinf->exec_domain->domain->domain_id,
-                             curinf->exec_domain->vcpu_id, curinf->score[i]);
-       }
-       /*cur now contains the element, before which we'll enqueue*/
-       PRINT(3, "\tlist_add to %p\n", cur->prev);
-       list_add(EXTRALIST(d,i),cur->prev);
-       
-       /*continue updating the extraq*/
-       if ((cur != EXTRAQ(d->processor,i)) && sub)
-               for (cur = cur->next; cur != EXTRAQ(d->processor,i);
-                    cur = cur-> next) {
-                       curinf = list_entry(cur,struct sedf_edom_info,
-                               extralist[i]);
-                       curinf->score[i] -= sub;
-                       PRINT(4, "\tupdating domain %i.%i (score= %u)\n",
-                             curinf->exec_domain->domain->domain_id, 
-                             curinf->exec_domain->vcpu_id, curinf->score[i]);
-               }
-       ASSERT(extraq_on(d,i));
+    struct list_head      *cur;
+    struct sedf_edom_info *curinf;
+    ASSERT(!extraq_on(d,i));
+    PRINT(3, "Adding domain %i.%i (score= %i, short_pen= %"PRIi64")"
+          " to L%i extraq\n",
+          d->domain->domain_id, d->vcpu_id, EDOM_INFO(d)->score[i],
+          EDOM_INFO(d)->short_block_lost_tot, i); 
+    /*iterate through all elements to find our "hole" and on our way
+      update all the other scores*/
+    list_for_each(cur,EXTRAQ(d->processor,i)){
+        curinf = list_entry(cur,struct sedf_edom_info,extralist[i]);
+        curinf->score[i] -= sub;
+        if (EDOM_INFO(d)->score[i] < curinf->score[i])
+            break;
+        else
+            PRINT(4,"\tbehind domain %i.%i (score= %i)\n",
+                  curinf->exec_domain->domain->domain_id,
+                  curinf->exec_domain->vcpu_id, curinf->score[i]);
+    }
+    /*cur now contains the element, before which we'll enqueue*/
+    PRINT(3, "\tlist_add to %p\n", cur->prev);
+    list_add(EXTRALIST(d,i),cur->prev);
+    /*continue updating the extraq*/
+    if ((cur != EXTRAQ(d->processor,i)) && sub)
+        for (cur = cur->next; cur != EXTRAQ(d->processor,i);
+             cur = cur-> next) {
+            curinf = list_entry(cur,struct sedf_edom_info,
+                                extralist[i]);
+            curinf->score[i] -= sub;
+            PRINT(4, "\tupdating domain %i.%i (score= %u)\n",
+                  curinf->exec_domain->domain->domain_id, 
+                  curinf->exec_domain->vcpu_id, curinf->score[i]);
+        }
+    ASSERT(extraq_on(d,i));
 }
 static inline void extraq_check(struct exec_domain *d) {
-       if (extraq_on(d, EXTRA_UTIL_Q)) {
-               PRINT(2,"Dom %i.%i is on L1 extraQ\n",d->domain->domain_id, d->vcpu_id);
-               if (!(EDOM_INFO(d)->status & EXTRA_AWARE) &&
-                   !extra_runs(EDOM_INFO(d))) {
-                       extraq_del(d, EXTRA_UTIL_Q);
-                       PRINT(2,"Removed dom %i.%i from L1 extraQ\n",
-                             d->domain->domain_id, d->vcpu_id);
-               }
-       } else {
-               PRINT(2,"Dom %i.%i is NOT on L1 extraQ\n",d->domain->domain_id,
-                     d->vcpu_id);
-               if ((EDOM_INFO(d)->status & EXTRA_AWARE) && sedf_runnable(d))
-               {
-                       #if (EXTRA == EXTRA_ROUNDR)
-                       extraq_add_tail(d, EXTRA_UTIL_Q);
-                       #elif (EXTRA == EXTRA_SLICE_WEIGHT || \
-                              EXTRA == EXTRA_BLOCK_WEIGHT)
-                       extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
-                       #elif
-                       ;
-                       #endif
-                       PRINT(2,"Added dom %i.%i to L1 extraQ\n",d->domain->domain_id,
-                             d->vcpu_id);
-               }
-       }
+    if (extraq_on(d, EXTRA_UTIL_Q)) {
+        PRINT(2,"Dom %i.%i is on L1 extraQ\n",d->domain->domain_id, d->vcpu_id);
+        if (!(EDOM_INFO(d)->status & EXTRA_AWARE) &&
+            !extra_runs(EDOM_INFO(d))) {
+            extraq_del(d, EXTRA_UTIL_Q);
+            PRINT(2,"Removed dom %i.%i from L1 extraQ\n",
+                  d->domain->domain_id, d->vcpu_id);
+        }
+    } else {
+        PRINT(2,"Dom %i.%i is NOT on L1 extraQ\n",d->domain->domain_id,
+              d->vcpu_id);
+        if ((EDOM_INFO(d)->status & EXTRA_AWARE) && sedf_runnable(d))
+        {
+#if (EXTRA == EXTRA_ROUNDR)
+            extraq_add_tail(d, EXTRA_UTIL_Q);
+#elif (EXTRA == EXTRA_SLICE_WEIGHT || \
+          EXTRA == EXTRA_BLOCK_WEIGHT)
+            extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
+#elif
+            ;
+#endif
+            PRINT(2,"Added dom %i.%i to L1 extraQ\n",d->domain->domain_id,
+                  d->vcpu_id);
+        }
+    }
 }
 
 static inline void extraq_check_add_unblocked(struct exec_domain *d, 
-    int priority) {
-       struct sedf_edom_info *inf = EDOM_INFO(d);
-       if (inf->status & EXTRA_AWARE) 
-       #if (EXTRA == EXTRA_ROUNDR)
-               if (priority)
-                       extraq_add_head(d,EXTRA_UTIL_Q);
-               else
-                       extraq_add_tail(d,EXTRA_UTIL_Q);
-       #elif (EXTRA == EXTRA_SLICE_WEIGHT \
-           || EXTRA == EXTRA_BLOCK_WEIGHT)
-               /*put in on the weighted extraq, 
-                 without updating any scores*/
-               extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
-       #else
-               ;
-       #endif
+                                              int priority) {
+    struct sedf_edom_info *inf = EDOM_INFO(d);
+    if (inf->status & EXTRA_AWARE) 
+#if (EXTRA == EXTRA_ROUNDR)
+        if (priority)
+            extraq_add_head(d,EXTRA_UTIL_Q);
+        else
+            extraq_add_tail(d,EXTRA_UTIL_Q);
+#elif (EXTRA == EXTRA_SLICE_WEIGHT \
+     || EXTRA == EXTRA_BLOCK_WEIGHT)
+    /*put in on the weighted extraq, 
+    without updating any scores*/
+    extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
+#else
+    ;
+#endif
 }
 
 static inline int __task_on_queue(struct exec_domain *d) {
-       return (((LIST(d))->next != NULL) && (LIST(d)->next != LIST(d)));
+    return (((LIST(d))->next != NULL) && (LIST(d)->next != LIST(d)));
 }
 static inline void __del_from_queue(struct exec_domain *d)
 {
@@ -277,41 +275,41 @@ static inline void __del_from_queue(struct exec_domain *d)
 typedef int(*list_comparer)(struct list_head* el1, struct list_head* el2);
 
 static inline void list_insert_sort(struct list_head *list,
-    struct list_head *element, list_comparer comp) {
-       struct list_head     *cur;
-       /*iterate through all elements to find our "hole"*/
-       list_for_each(cur,list){
-               if (comp(element, cur) < 0)
-                       break;
-       }
-       /*cur now contains the element, before which we'll enqueue*/
-       PRINT(3,"\tlist_add to %p\n",cur->prev);
-       list_add(element, cur->prev);
+                                    struct list_head *element, list_comparer comp) {
+    struct list_head     *cur;
+    /*iterate through all elements to find our "hole"*/
+    list_for_each(cur,list){
+        if (comp(element, cur) < 0)
+            break;
+    }
+    /*cur now contains the element, before which we'll enqueue*/
+    PRINT(3,"\tlist_add to %p\n",cur->prev);
+    list_add(element, cur->prev);
 }  
 #define DOMAIN_COMPARER(name, field, comp1, comp2)          \
 int name##_comp(struct list_head* el1, struct list_head* el2) \
 {                                                           \
      struct sedf_edom_info *d1, *d2;                     \
      d1 = list_entry(el1,struct sedf_edom_info, field);  \
      d2 = list_entry(el2,struct sedf_edom_info, field);  \
      if ((comp1) == (comp2))                             \
-               return 0;                                   \
      if ((comp1) < (comp2))                              \
-               return -1;                                  \
      else                                                \
-               return 1;                                   \
+ struct sedf_edom_info *d1, *d2;                     \
+ d1 = list_entry(el1,struct sedf_edom_info, field);  \
+ d2 = list_entry(el2,struct sedf_edom_info, field);  \
+ if ((comp1) == (comp2))                             \
+  return 0;                                   \
+ if ((comp1) < (comp2))                              \
+  return -1;                                  \
+ else                                                \
+  return 1;                                   \
 }
 /* adds a domain to the queue of processes which wait for the beginning of the
    next period; this list is therefore sortet by this time, which is simply
    absol. deadline - period
  */ 
 DOMAIN_COMPARER(waitq, list, PERIOD_BEGIN(d1), PERIOD_BEGIN(d2))
-static inline void __add_to_waitqueue_sort(struct exec_domain *d) {
-       ASSERT(!__task_on_queue(d));
-       PRINT(3,"Adding domain %i.%i (bop= %"PRIu64") to waitq\n",
-              d->domain->domain_id, d->vcpu_id, PERIOD_BEGIN(EDOM_INFO(d)));
-       list_insert_sort(WAITQ(d->processor), LIST(d), waitq_comp);
-       ASSERT(__task_on_queue(d));
+    static inline void __add_to_waitqueue_sort(struct exec_domain *d) {
+    ASSERT(!__task_on_queue(d));
+    PRINT(3,"Adding domain %i.%i (bop= %"PRIu64") to waitq\n",
+          d->domain->domain_id, d->vcpu_id, PERIOD_BEGIN(EDOM_INFO(d)));
+    list_insert_sort(WAITQ(d->processor), LIST(d), waitq_comp);
+    ASSERT(__task_on_queue(d));
 }
 
 /* adds a domain to the queue of processes which have started their current
@@ -320,247 +318,228 @@ static inline void __add_to_waitqueue_sort(struct exec_domain *d) {
    task will run. As we are implementing EDF, this list is sorted by deadlines.
  */ 
 DOMAIN_COMPARER(runq, list, d1->deadl_abs, d2->deadl_abs)
-static inline void __add_to_runqueue_sort(struct exec_domain *d) {
-       PRINT(3,"Adding domain %i.%i (deadl= %"PRIu64") to runq\n",
-              d->domain->domain_id, d->vcpu_id, EDOM_INFO(d)->deadl_abs);
-       list_insert_sort(RUNQ(d->processor), LIST(d), runq_comp);
-}
-
-/* Initialises the queues */
-static int sedf_init_scheduler() {
-       int i;
-       PRINT(2,"sedf_init_scheduler was called\n");
-       
-       for ( i = 0; i < NR_CPUS; i++ ) {
-               schedule_data[i].sched_priv = 
-                       xmalloc(struct sedf_cpu_info);
-               if ( schedule_data[i].sched_priv == NULL )
-                       return -1;
-               INIT_LIST_HEAD(WAITQ(i));
-               INIT_LIST_HEAD(RUNQ(i));
-               INIT_LIST_HEAD(EXTRAQ(i,EXTRA_PEN_Q));
-               INIT_LIST_HEAD(EXTRAQ(i,EXTRA_UTIL_Q));
-       }
-       return 0;   
+    static inline void __add_to_runqueue_sort(struct exec_domain *d) {
+    PRINT(3,"Adding domain %i.%i (deadl= %"PRIu64") to runq\n",
+          d->domain->domain_id, d->vcpu_id, EDOM_INFO(d)->deadl_abs);
+    list_insert_sort(RUNQ(d->processor), LIST(d), runq_comp);
 }
 
 /* Allocates memory for per domain private scheduling data*/
 static int sedf_alloc_task(struct exec_domain *d) {
-       PRINT(2,"sedf_alloc_task was called, domain-id %i.%i\n",d->domain->domain_id,
-             d->vcpu_id);
-       if (d->domain->sched_priv == NULL) {
-               if ((d->domain->sched_priv = 
-                    xmalloc(struct sedf_dom_info)) == NULL )
-               return -1;
-               memset(d->domain->sched_priv, 0, sizeof(struct sedf_dom_info));
-       }
-       if ((d->sched_priv = xmalloc(struct sedf_edom_info)) == NULL )
-               return -1;
-       memset(d->sched_priv, 0, sizeof(struct sedf_edom_info));
-       return 0;
+    PRINT(2,"sedf_alloc_task was called, domain-id %i.%i\n",d->domain->domain_id,
+          d->vcpu_id);
+    if (d->domain->sched_priv == NULL) {
+        if ((d->domain->sched_priv = 
+             xmalloc(struct sedf_dom_info)) == NULL )
+            return -1;
+        memset(d->domain->sched_priv, 0, sizeof(struct sedf_dom_info));
+    }
+    if ((d->sched_priv = xmalloc(struct sedf_edom_info)) == NULL )
+        return -1;
+    memset(d->sched_priv, 0, sizeof(struct sedf_edom_info));
+    return 0;
 }
 
 /* Setup the sedf_dom_info */
 static void sedf_add_task(struct exec_domain *d)
 {
-       struct sedf_edom_info *inf = EDOM_INFO(d);
-       inf->exec_domain = d;
-       
-       PRINT(2,"sedf_add_task was called, domain-id %i.%i\n",d->domain->domain_id,
-             d->vcpu_id);
-             
-       if (d->domain->domain_id==0) {
-               /*set dom0 to something useful to boot the machine*/
-               inf->period    = MILLISECS(20);
-               inf->slice     = MILLISECS(15);
-               inf->latency   = 0;
-               inf->deadl_abs = 0;
-               inf->status     = EXTRA_NONE | SEDF_ASLEEP;/*EXTRA_AWARE; */
-       }
-       else {
-               /*other domains run in best effort mode*/
-               inf->period    = WEIGHT_PERIOD;
-               inf->slice     = 0;
-               inf->deadl_abs = 0;
-               inf->latency   = 0;
-               inf->status     = EXTRA_AWARE | SEDF_ASLEEP;
-               inf->extraweight = 1;
-       }
-       inf->period_orig = inf->period; inf->slice_orig = inf->slice;
-       INIT_LIST_HEAD(&(inf->list));
-       INIT_LIST_HEAD(&(inf->extralist[EXTRA_PEN_Q]));
-       INIT_LIST_HEAD(&(inf->extralist[EXTRA_UTIL_Q]));
-       
-       if (d->domain->domain_id != IDLE_DOMAIN_ID) {
-               extraq_check(d);
-       }
+    struct sedf_edom_info *inf = EDOM_INFO(d);
+    inf->exec_domain = d;
+    PRINT(2,"sedf_add_task was called, domain-id %i.%i\n",d->domain->domain_id,
+          d->vcpu_id);
+
+    /* Allocate per-CPU context if this is the first domain to be added. */
+    if ( schedule_data[d->processor].sched_priv == NULL )
+    {
+        schedule_data[d->processor].sched_priv = 
+            xmalloc(struct sedf_cpu_info);
+        BUG_ON(schedule_data[d->processor].sched_priv == NULL);
+        INIT_LIST_HEAD(WAITQ(d->processor));
+        INIT_LIST_HEAD(RUNQ(d->processor));
+        INIT_LIST_HEAD(EXTRAQ(d->processor,EXTRA_PEN_Q));
+        INIT_LIST_HEAD(EXTRAQ(d->processor,EXTRA_UTIL_Q));
+    }
+       
+    if (d->domain->domain_id==0) {
+        /*set dom0 to something useful to boot the machine*/
+        inf->period    = MILLISECS(20);
+        inf->slice     = MILLISECS(15);
+        inf->latency   = 0;
+        inf->deadl_abs = 0;
+        inf->status     = EXTRA_NONE | SEDF_ASLEEP;/*EXTRA_AWARE; */
+    } else {
+        /*other domains run in best effort mode*/
+        inf->period    = WEIGHT_PERIOD;
+        inf->slice     = 0;
+        inf->deadl_abs = 0;
+        inf->latency   = 0;
+        inf->status     = EXTRA_AWARE | SEDF_ASLEEP;
+        inf->extraweight = 1;
+    }
+    inf->period_orig = inf->period; inf->slice_orig = inf->slice;
+    INIT_LIST_HEAD(&(inf->list));
+    INIT_LIST_HEAD(&(inf->extralist[EXTRA_PEN_Q]));
+    INIT_LIST_HEAD(&(inf->extralist[EXTRA_UTIL_Q]));
+    if (!is_idle_task(d->domain)) {
+        extraq_check(d);
+    } else {
+        EDOM_INFO(d)->deadl_abs = 0;
+        EDOM_INFO(d)->status &= ~SEDF_ASLEEP;
+    }
 }
 
 /* Frees memory used by domain info */
 static void sedf_free_task(struct domain *d)
 {
-       int i;
-       PRINT(2,"sedf_free_task was called, domain-id %i\n",d->domain_id);
-       ASSERT(d->sched_priv != NULL);
-       xfree(d->sched_priv);
-       
-       for (i = 0; i < MAX_VIRT_CPUS; i++)
-               if ( d->exec_domain[i] ) {
-                       ASSERT(d->exec_domain[i]->sched_priv != NULL);
-                       xfree(d->exec_domain[i]->sched_priv);
-               }
-}
-
-/* Initialises idle task */
-static int sedf_init_idle_task(struct exec_domain *d) {
-       PRINT(2,"sedf_init_idle_task was called, domain-id %i.%i\n",
-             d->domain->domain_id, d->vcpu_id);
-       if ( sedf_alloc_task(d) < 0 )
-               return -1;
-       
-       sedf_add_task(d);
-       EDOM_INFO(d)->deadl_abs = 0;
-       EDOM_INFO(d)->status &= ~SEDF_ASLEEP;
-       set_bit(_VCPUF_running, &d->vcpu_flags);
-       /*the idle task doesn't have to turn up on any list...*/
-       return 0;
+    int i;
+    PRINT(2,"sedf_free_task was called, domain-id %i\n",d->domain_id);
+    ASSERT(d->sched_priv != NULL);
+    xfree(d->sched_priv);
+    for (i = 0; i < MAX_VIRT_CPUS; i++)
+        if ( d->exec_domain[i] ) {
+            ASSERT(d->exec_domain[i]->sched_priv != NULL);
+            xfree(d->exec_domain[i]->sched_priv);
+        }
 }
 
 /* handles the rescheduling, bookkeeping of domains running in their realtime-time :)*/
 static inline void desched_edf_dom (s_time_t now, struct exec_domain* d) {
-       struct sedf_edom_info* inf = EDOM_INFO(d);
-       /*current domain is running in real time mode*/
-       
-       ASSERT(__task_on_queue(d));
-       /*update the domains cputime*/
-       inf->cputime += now - inf->sched_start_abs;
+    struct sedf_edom_info* inf = EDOM_INFO(d);
+    /*current domain is running in real time mode*/
+    ASSERT(__task_on_queue(d));
+    /*update the domains cputime*/
+    inf->cputime += now - inf->sched_start_abs;
 
-       /*scheduling decisions, which don't remove the running domain
-         from the runq*/
-       if ((inf->cputime < inf->slice) && sedf_runnable(d))
-               return;
-               
-       __del_from_queue(d);
-               
-       /*manage bookkeeping (i.e. calculate next deadline,
-         memorize overun-time of slice) of finished domains*/
-       if (inf->cputime >= inf->slice) {
-               inf->cputime -= inf->slice;
-               
-               if (inf->period < inf->period_orig) {
-                       /*this domain runs in latency scaling or burst mode*/
-                       #if (UNBLOCK == UNBLOCK_BURST)
-                       /*if we are runnig in burst scaling wait for two periods
-                         before scaling periods up again*/ 
-                       if (now - inf->unblock_abs >= 2 * inf->period)
-                       #endif
-                       {
-                               inf->period *= 2; inf->slice *= 2;
-                               if ((inf->period > inf->period_orig) ||
-                                   (inf->slice > inf->slice_orig)) {
-                                       /*reset slice & period*/
-                                       inf->period = inf->period_orig;
-                                       inf->slice = inf->slice_orig;
-                               }
-                       }
-               }
-               /*set next deadline*/
-               inf->deadl_abs += inf->period;
-       }
-       
-       /*add a runnable domain to the waitqueue*/
-       if (sedf_runnable(d))
-               __add_to_waitqueue_sort(d);
-       else {
-               /*we have a blocked realtime task -> remove it from exqs too*/
-               #if (EXTRA > EXTRA_OFF)
-               #if (EXTRA == EXTRA_BLOCK_WEIGHT)
-               if (extraq_on(d, EXTRA_PEN_Q)) extraq_del(d, EXTRA_PEN_Q);
-               #endif
-               if (extraq_on(d, EXTRA_UTIL_Q)) extraq_del(d, EXTRA_UTIL_Q);
-               #endif
-       }
-       ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
-       ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q), 
-         sedf_runnable(d)));
+    /*scheduling decisions, which don't remove the running domain
+      from the runq*/
+    if ((inf->cputime < inf->slice) && sedf_runnable(d))
+        return;
+  
+    __del_from_queue(d);
+  
+    /*manage bookkeeping (i.e. calculate next deadline,
+      memorize overun-time of slice) of finished domains*/
+    if (inf->cputime >= inf->slice) {
+        inf->cputime -= inf->slice;
+  
+        if (inf->period < inf->period_orig) {
+            /*this domain runs in latency scaling or burst mode*/
+#if (UNBLOCK == UNBLOCK_BURST)
+            /*if we are runnig in burst scaling wait for two periods
+              before scaling periods up again*/ 
+            if (now - inf->unblock_abs >= 2 * inf->period)
+#endif
+            {
+                inf->period *= 2; inf->slice *= 2;
+                if ((inf->period > inf->period_orig) ||
+                    (inf->slice > inf->slice_orig)) {
+                    /*reset slice & period*/
+                    inf->period = inf->period_orig;
+                    inf->slice = inf->slice_orig;
+                }
+            }
+        }
+        /*set next deadline*/
+        inf->deadl_abs += inf->period;
+    }
+    /*add a runnable domain to the waitqueue*/
+    if (sedf_runnable(d))
+        __add_to_waitqueue_sort(d);
+    else {
+        /*we have a blocked realtime task -> remove it from exqs too*/
+#if (EXTRA > EXTRA_OFF)
+#if (EXTRA == EXTRA_BLOCK_WEIGHT)
+        if (extraq_on(d, EXTRA_PEN_Q)) extraq_del(d, EXTRA_PEN_Q);
+#endif
+        if (extraq_on(d, EXTRA_UTIL_Q)) extraq_del(d, EXTRA_UTIL_Q);
+#endif
+    }
+    ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
+    ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q), 
+                 sedf_runnable(d)));
 }
 
 /* Update all elements on the queues */
 static inline void update_queues(s_time_t now, struct list_head* runq, 
-struct list_head* waitq) {
-       struct list_head     *cur,*tmp;
-       struct sedf_edom_info *curinf;
-       
-       PRINT(3,"Updating waitq..\n");
-       /*check for the first elements of the waitqueue, whether their
-         next period has already started*/
-       list_for_each_safe(cur, tmp, waitq) {
-               curinf = list_entry(cur, struct sedf_edom_info, list);
-               PRINT(4,"\tLooking @ dom %i.%i\n",
-                     curinf->exec_domain->domain->domain_id, curinf->exec_domain->vcpu_id);
-               if (PERIOD_BEGIN(curinf) <= now) {
-                       __del_from_queue(curinf->exec_domain);
-                       __add_to_runqueue_sort(curinf->exec_domain);
-               }
-               else
-                       break;
-       }
-       
-       PRINT(3,"Updating runq..\n");
-       /*process the runq, find domains that are on
-         the runqueue which shouldn't be there*/
-       list_for_each_safe(cur, tmp, runq) {
-               curinf = list_entry(cur,struct sedf_edom_info,list);
-               PRINT(4,"\tLooking @ dom %i.%i\n",
-                     curinf->exec_domain->domain->domain_id, curinf->exec_domain->vcpu_id);
-               if (unlikely(curinf->slice == 0)) {
-                       /*ignore domains with empty slice*/
-                       PRINT(4,"\tUpdating zero-slice domain %i.%i\n",
-                             curinf->exec_domain->domain->domain_id,
-                             curinf->exec_domain->vcpu_id);
-                       __del_from_queue(curinf->exec_domain);
-                       
-                       /*move them to their next period*/
-                       curinf->deadl_abs += curinf->period;
-                       /*and put them back into the queue*/
-                       __add_to_waitqueue_sort(curinf->exec_domain);
-                       continue;
-               }
-               if (unlikely((curinf->deadl_abs < now) ||
-                       (curinf->cputime > curinf->slice))) {
-                       /*we missed the deadline or the slice was
-                               already finished... might hapen because
-                               of dom_adj.*/
-                       PRINT(4,"\tDomain %i.%i exceeded it's deadline/"
-                               "slice (%"PRIu64" / %"PRIu64") now: %"PRIu64
-                               " cputime: %"PRIu64"\n",
-                               curinf->exec_domain->domain->domain_id,
-                               curinf->exec_domain->vcpu_id,
-                               curinf->deadl_abs, curinf->slice, now,
-                               curinf->cputime);
-                       __del_from_queue(curinf->exec_domain);
-                       /*common case: we miss one period!*/
-                       curinf->deadl_abs += curinf->period;
-                       
-                       /*if we are still behind: modulo arithmetic,
-                               force deadline to be in future and
-                               aligned to period borders!*/
-                       if (unlikely(curinf->deadl_abs < now))
-                               curinf->deadl_abs += 
-                                       DIV_UP(now - curinf->deadl_abs,
-                                       curinf->period) * curinf->period;
-                       ASSERT(curinf->deadl_abs > now);
-                       /*give a fresh slice*/
-                       curinf->cputime = 0;
-                       if (PERIOD_BEGIN(curinf) > now)
-                               __add_to_waitqueue_sort(curinf->exec_domain);
-                       else
-                               __add_to_runqueue_sort(curinf->exec_domain);
-               }
-               else
-                       break;
-       }
-       PRINT(3,"done updating the queues\n");
+                                 struct list_head* waitq) {
+    struct list_head     *cur,*tmp;
+    struct sedf_edom_info *curinf;
+    PRINT(3,"Updating waitq..\n");
+    /*check for the first elements of the waitqueue, whether their
+      next period has already started*/
+    list_for_each_safe(cur, tmp, waitq) {
+        curinf = list_entry(cur, struct sedf_edom_info, list);
+        PRINT(4,"\tLooking @ dom %i.%i\n",
+              curinf->exec_domain->domain->domain_id, curinf->exec_domain->vcpu_id);
+        if (PERIOD_BEGIN(curinf) <= now) {
+            __del_from_queue(curinf->exec_domain);
+            __add_to_runqueue_sort(curinf->exec_domain);
+        }
+        else
+            break;
+    }
+    PRINT(3,"Updating runq..\n");
+    /*process the runq, find domains that are on
+      the runqueue which shouldn't be there*/
+    list_for_each_safe(cur, tmp, runq) {
+        curinf = list_entry(cur,struct sedf_edom_info,list);
+        PRINT(4,"\tLooking @ dom %i.%i\n",
+              curinf->exec_domain->domain->domain_id, curinf->exec_domain->vcpu_id);
+        if (unlikely(curinf->slice == 0)) {
+            /*ignore domains with empty slice*/
+            PRINT(4,"\tUpdating zero-slice domain %i.%i\n",
+                  curinf->exec_domain->domain->domain_id,
+                  curinf->exec_domain->vcpu_id);
+            __del_from_queue(curinf->exec_domain);
+   
+            /*move them to their next period*/
+            curinf->deadl_abs += curinf->period;
+            /*and put them back into the queue*/
+            __add_to_waitqueue_sort(curinf->exec_domain);
+            continue;
+        }
+        if (unlikely((curinf->deadl_abs < now) ||
+                     (curinf->cputime > curinf->slice))) {
+            /*we missed the deadline or the slice was
+              already finished... might hapen because
+              of dom_adj.*/
+            PRINT(4,"\tDomain %i.%i exceeded it's deadline/"
+                  "slice (%"PRIu64" / %"PRIu64") now: %"PRIu64
+                  " cputime: %"PRIu64"\n",
+                  curinf->exec_domain->domain->domain_id,
+                  curinf->exec_domain->vcpu_id,
+                  curinf->deadl_abs, curinf->slice, now,
+                  curinf->cputime);
+            __del_from_queue(curinf->exec_domain);
+            /*common case: we miss one period!*/
+            curinf->deadl_abs += curinf->period;
+   
+            /*if we are still behind: modulo arithmetic,
+              force deadline to be in future and
+              aligned to period borders!*/
+            if (unlikely(curinf->deadl_abs < now))
+                curinf->deadl_abs += 
+                    DIV_UP(now - curinf->deadl_abs,
+                           curinf->period) * curinf->period;
+            ASSERT(curinf->deadl_abs > now);
+            /*give a fresh slice*/
+            curinf->cputime = 0;
+            if (PERIOD_BEGIN(curinf) > now)
+                __add_to_waitqueue_sort(curinf->exec_domain);
+            else
+                __add_to_runqueue_sort(curinf->exec_domain);
+        }
+        else
+            break;
+    }
+    PRINT(3,"done updating the queues\n");
 }
 
 #if (EXTRA > EXTRA_OFF)
@@ -571,140 +550,140 @@ struct list_head* waitq) {
    if the domain is blocked / has regained its short-block-loss
    time it is not put on any queue */
 static inline void desched_extra_dom(s_time_t now, struct exec_domain* d) {
-       struct sedf_edom_info   *inf = EDOM_INFO(d);
-       int                     i    = extra_get_cur_q(inf);
-       
+    struct sedf_edom_info *inf = EDOM_INFO(d);
+    int    i    = extra_get_cur_q(inf);
 #if (EXTRA == EXTRA_SLICE_WEIGHT || EXTRA == EXTRA_BLOCK_WEIGHT)
-       unsigned long         oldscore;
+    unsigned long         oldscore;
 #endif
-       ASSERT(extraq_on(d, i));
-       /*unset all running flags*/
-       inf->status  &= ~(EXTRA_RUN_PEN | EXTRA_RUN_UTIL);
-       /*fresh slice for the next run*/
-       inf->cputime = 0;
-       /*accumulate total extratime*/
-       inf->extra_time_tot += now - inf->sched_start_abs;
-       /*remove extradomain from head of the queue*/
-       extraq_del(d, i);
+    ASSERT(extraq_on(d, i));
+    /*unset all running flags*/
+    inf->status  &= ~(EXTRA_RUN_PEN | EXTRA_RUN_UTIL);
+    /*fresh slice for the next run*/
+    inf->cputime = 0;
+    /*accumulate total extratime*/
+    inf->extra_time_tot += now - inf->sched_start_abs;
+    /*remove extradomain from head of the queue*/
+    extraq_del(d, i);
 
 #if (EXTRA == EXTRA_ROUNDR)
-       if (sedf_runnable(d) && (inf->status & EXTRA_AWARE))
-               /*add to the tail if it is runnable => round-robin*/
-               extraq_add_tail(d, EXTRA_UTIL_Q);
+    if (sedf_runnable(d) && (inf->status & EXTRA_AWARE))
+        /*add to the tail if it is runnable => round-robin*/
+        extraq_add_tail(d, EXTRA_UTIL_Q);
 #elif (EXTRA == EXTRA_SLICE_WEIGHT || EXTRA == EXTRA_BLOCK_WEIGHT)
-       /*update the score*/
-       oldscore      = inf->score[i];
+    /*update the score*/
+    oldscore      = inf->score[i];
 #if (EXTRA == EXTRA_BLOCK_WEIGHT)
-       if (i == EXTRA_PEN_Q) {
-               /*domain was running in L0 extraq*/
-               /*reduce block lost, probably more sophistication here!*/
-               /*inf->short_block_lost_tot -= EXTRA_QUANTUM;*/
-               inf->short_block_lost_tot -= now - inf->sched_start_abs;
-               PRINT(3,"Domain %i.%i: Short_block_loss: %"PRIi64"\n", 
-                     inf->exec_domain->domain->domain_id, inf->exec_domain->vcpu_id,
-                     inf->short_block_lost_tot);
-               if (inf->short_block_lost_tot <= 0) {
-                       PRINT(4,"Domain %i.%i compensated short block loss!\n",
-                         inf->exec_domain->domain->domain_id, inf->exec_domain->vcpu_id);
-                       /*we have (over-)compensated our block penalty*/
-                       inf->short_block_lost_tot = 0;
-                       /*we don't want a place on the penalty queue anymore!*/
-                       inf->status &= ~EXTRA_WANT_PEN_Q;
-                       goto check_extra_queues;
-               }
-               /*we have to go again for another try in the block-extraq,
-                 the score is not used incremantally here, as this is
-                 already done by recalculating the block_lost*/
-               inf->score[EXTRA_PEN_Q] = (inf->period << 10) /
-                                         inf->short_block_lost_tot;
-               oldscore = 0;
-       } else
+    if (i == EXTRA_PEN_Q) {
+        /*domain was running in L0 extraq*/
+        /*reduce block lost, probably more sophistication here!*/
+        /*inf->short_block_lost_tot -= EXTRA_QUANTUM;*/
+        inf->short_block_lost_tot -= now - inf->sched_start_abs;
+        PRINT(3,"Domain %i.%i: Short_block_loss: %"PRIi64"\n", 
+              inf->exec_domain->domain->domain_id, inf->exec_domain->vcpu_id,
+              inf->short_block_lost_tot);
+        if (inf->short_block_lost_tot <= 0) {
+            PRINT(4,"Domain %i.%i compensated short block loss!\n",
+                  inf->exec_domain->domain->domain_id, inf->exec_domain->vcpu_id);
+            /*we have (over-)compensated our block penalty*/
+            inf->short_block_lost_tot = 0;
+            /*we don't want a place on the penalty queue anymore!*/
+            inf->status &= ~EXTRA_WANT_PEN_Q;
+            goto check_extra_queues;
+        }
+        /*we have to go again for another try in the block-extraq,
+          the score is not used incremantally here, as this is
+          already done by recalculating the block_lost*/
+        inf->score[EXTRA_PEN_Q] = (inf->period << 10) /
+            inf->short_block_lost_tot;
+        oldscore = 0;
+    } else
 #endif
-       {
-               /*domain was running in L1 extraq => score is inverse of
-                 utilization and is used somewhat incremental!*/
-               if (!inf->extraweight)
-                       /*NB: use fixed point arithmetic with 10 bits*/
-                       inf->score[EXTRA_UTIL_Q] = (inf->period << 10) /
-                                                   inf->slice;
-               else
-                       /*give a domain w/ exweight = 1 as much as a domain with
-                         util = 1/128*/
-                       inf->score[EXTRA_UTIL_Q] = (1<<17) / inf->extraweight;
-       }
-check_extra_queues:
-       /* Adding a runnable domain to the right queue and removing blocked ones*/
-       if (sedf_runnable(d)) {
-               /*add according to score: weighted round robin*/
-               if (inf->status & (EXTRA_AWARE | EXTRA_WANT_PEN_Q))
-                       extraq_add_sort_update(d, i, oldscore);
-       }
-       else {
-               /*remove this blocked domain from the waitq!*/
-               __del_from_queue(d);
+    {
+        /*domain was running in L1 extraq => score is inverse of
+          utilization and is used somewhat incremental!*/
+        if (!inf->extraweight)
+            /*NB: use fixed point arithmetic with 10 bits*/
+            inf->score[EXTRA_UTIL_Q] = (inf->period << 10) /
+                inf->slice;
+        else
+            /*give a domain w/ exweight = 1 as much as a domain with
+              util = 1/128*/
+            inf->score[EXTRA_UTIL_Q] = (1<<17) / inf->extraweight;
+    }
+ check_extra_queues:
+    /* Adding a runnable domain to the right queue and removing blocked ones*/
+    if (sedf_runnable(d)) {
+        /*add according to score: weighted round robin*/
+        if (inf->status & (EXTRA_AWARE | EXTRA_WANT_PEN_Q))
+            extraq_add_sort_update(d, i, oldscore);
+    }
+    else {
+        /*remove this blocked domain from the waitq!*/
+        __del_from_queue(d);
 #if (EXTRA == EXTRA_BLOCK_WEIGHT)
-               /*make sure that we remove a blocked domain from the other
-                 extraq too*/
-               if (i == EXTRA_PEN_Q) {
-                       if (extraq_on(d, EXTRA_UTIL_Q))
-                               extraq_del(d, EXTRA_UTIL_Q);
-               }
-               else {
-                       if (extraq_on(d, EXTRA_PEN_Q))
-                               extraq_del(d, EXTRA_PEN_Q);
-               }
+        /*make sure that we remove a blocked domain from the other
+          extraq too*/
+        if (i == EXTRA_PEN_Q) {
+            if (extraq_on(d, EXTRA_UTIL_Q))
+                extraq_del(d, EXTRA_UTIL_Q);
+        }
+        else {
+            if (extraq_on(d, EXTRA_PEN_Q))
+                extraq_del(d, EXTRA_PEN_Q);
+        }
 #endif
-       }
+    }
 #endif
-       ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
-       ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q), 
-         sedf_runnable(d)));
+    ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
+    ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q), 
+                 sedf_runnable(d)));
 }
 #endif
 
 static inline struct task_slice sedf_do_extra_schedule (s_time_t now,
-    s_time_t end_xt, struct list_head *extraq[], int cpu) {
-       struct task_slice               ret;
-       struct sedf_edom_info   *runinf;
-       
-       /* Enough time left to use for extratime? */
-       if (end_xt - now < EXTRA_QUANTUM)
-               goto return_idle;
+                                                        s_time_t end_xt, struct list_head *extraq[], int cpu) {
+    struct task_slice   ret;
+    struct sedf_edom_info *runinf;
+    /* Enough time left to use for extratime? */
+    if (end_xt - now < EXTRA_QUANTUM)
+        goto return_idle;
 #if (EXTRA == EXTRA_BLOCK_WEIGHT)
-       if (!list_empty(extraq[EXTRA_PEN_Q])) {
-               /*we still have elements on the level 0 extraq 
-                 => let those run first!*/
-               runinf   = list_entry(extraq[EXTRA_PEN_Q]->next, 
-                             struct sedf_edom_info, extralist[EXTRA_PEN_Q]);
-               runinf->status |= EXTRA_RUN_PEN;
-               ret.task = runinf->exec_domain;
-               ret.time = EXTRA_QUANTUM;
+    if (!list_empty(extraq[EXTRA_PEN_Q])) {
+        /*we still have elements on the level 0 extraq 
+          => let those run first!*/
+        runinf   = list_entry(extraq[EXTRA_PEN_Q]->next, 
+                              struct sedf_edom_info, extralist[EXTRA_PEN_Q]);
+        runinf->status |= EXTRA_RUN_PEN;
+        ret.task = runinf->exec_domain;
+        ret.time = EXTRA_QUANTUM;
 #ifdef SEDF_STATS
-               runinf->pen_extra_slices++;
+        runinf->pen_extra_slices++;
 #endif
-       } else
+    } else
 #endif
-       if (!list_empty(extraq[EXTRA_UTIL_Q])) {
-               /*use elements from the normal extraqueue*/
-               runinf   = list_entry(extraq[EXTRA_UTIL_Q]->next,
-                             struct sedf_edom_info, extralist[EXTRA_UTIL_Q]);
-               runinf->status |= EXTRA_RUN_UTIL;
-               ret.task = runinf->exec_domain;
-               ret.time = EXTRA_QUANTUM;
-       }
-       else
-               goto return_idle;
+        if (!list_empty(extraq[EXTRA_UTIL_Q])) {
+            /*use elements from the normal extraqueue*/
+            runinf   = list_entry(extraq[EXTRA_UTIL_Q]->next,
+                                  struct sedf_edom_info, extralist[EXTRA_UTIL_Q]);
+            runinf->status |= EXTRA_RUN_UTIL;
+            ret.task = runinf->exec_domain;
+            ret.time = EXTRA_QUANTUM;
+        }
+        else
+            goto return_idle;
 
-       ASSERT(ret.time > 0);
-       ASSERT(sedf_runnable(ret.task));
-       return ret;
-       
-return_idle:
-       ret.task = IDLETASK(cpu);
-       ret.time = end_xt - now;
-       ASSERT(ret.time > 0);
-       ASSERT(sedf_runnable(ret.task));
-       return ret;
+    ASSERT(ret.time > 0);
+    ASSERT(sedf_runnable(ret.task));
+    return ret;
+ return_idle:
+    ret.task = IDLETASK(cpu);
+    ret.time = end_xt - now;
+    ASSERT(ret.time > 0);
+    ASSERT(sedf_runnable(ret.task));
+    return ret;
 }
 /* Main scheduling function
    Reasons for calling this function are:
@@ -713,126 +692,123 @@ return_idle:
    -and various others ;) in general: determine which domain to run next*/
 static struct task_slice sedf_do_schedule(s_time_t now)
 {
-       int                   cpu      = current->processor;
-       struct list_head     *runq     = RUNQ(cpu);
-       struct list_head     *waitq    = WAITQ(cpu);
-       #if (EXTRA > EXTRA_OFF)
-       struct sedf_edom_info *inf     = EDOM_INFO(current);
-       struct list_head     *extraq[] = {EXTRAQ(cpu, EXTRA_PEN_Q),
-                                         EXTRAQ(cpu, EXTRA_UTIL_Q)};
-       #endif
-       struct task_slice          ret;
-       /*int i = 0;*/
-       /*idle tasks don't need any of the following stuf*/
-       if (is_idle_task(current->domain))
-               goto check_waitq;
-       
-       /* create local state of the status of the domain, in order to avoid
-          inconsistent state during scheduling decisions, because data for
-          domain_runnable is not protected by the scheduling lock!*/
-       if(!domain_runnable(current))
-               inf->status |= SEDF_ASLEEP;
-       
-       if (inf->status & SEDF_ASLEEP)
-               inf->block_abs = now;
+    int                   cpu      = current->processor;
+    struct list_head     *runq     = RUNQ(cpu);
+    struct list_head     *waitq    = WAITQ(cpu);
+#if (EXTRA > EXTRA_OFF)
+    struct sedf_edom_info *inf     = EDOM_INFO(current);
+    struct list_head     *extraq[] = {EXTRAQ(cpu, EXTRA_PEN_Q),
+                                      EXTRAQ(cpu, EXTRA_UTIL_Q)};
+#endif
+    struct task_slice          ret;
+    /*int i = 0;*/
+    /*idle tasks don't need any of the following stuf*/
+    if (is_idle_task(current->domain))
+        goto check_waitq;
+    /* create local state of the status of the domain, in order to avoid
+       inconsistent state during scheduling decisions, because data for
+       domain_runnable is not protected by the scheduling lock!*/
+    if(!domain_runnable(current))
+        inf->status |= SEDF_ASLEEP;
+    if (inf->status & SEDF_ASLEEP)
+        inf->block_abs = now;
 
-       #if (EXTRA > EXTRA_OFF)
-       if (unlikely(extra_runs(inf))) {
-               /*special treatment of domains running in extra time*/
-               desched_extra_dom(now, current);
-       }
-       else 
-       #endif
-       {
-               desched_edf_dom(now, current);
-       }
-check_waitq:
-       update_queues(now, runq, waitq);
-       
-       /*now simply pick the first domain from the runqueue, which has the
-         earliest deadline, because the list is sorted*/
-       struct sedf_edom_info *runinf, *waitinf;
-       
-       if (!list_empty(runq)) {
-               runinf   = list_entry(runq->next,struct sedf_edom_info,list);
-               ret.task = runinf->exec_domain;
-               if (!list_empty(waitq)) {
-                       waitinf  = list_entry(waitq->next,
-                                      struct sedf_edom_info,list);
-                       /*rerun scheduler, when scheduled domain reaches it's
-                         end of slice or the first domain from the waitqueue
-                         gets ready*/
-                       ret.time = MIN(now + runinf->slice - runinf->cputime,
-                                      PERIOD_BEGIN(waitinf)) - now;
-               }
-               else {
-                       ret.time = runinf->slice - runinf->cputime;
-               }
-               CHECK(ret.time > 0);
-               goto sched_done;
-       }
-       
-       if (!list_empty(waitq)) {
-               waitinf  = list_entry(waitq->next,struct sedf_edom_info, list);
-               /*we could not find any suitable domain 
-                 => look for domains that are aware of extratime*/
-               #if (EXTRA > EXTRA_OFF)
-               ret = sedf_do_extra_schedule(now, PERIOD_BEGIN(waitinf),
-                                            extraq, cpu);
-               #else
-               ret.task = IDLETASK(cpu);
-               ret.time = PERIOD_BEGIN(waitinf) - now;
-               #endif
-               CHECK(ret.time > 0);
-       }
-       else {
-               /*this could probably never happen, but one never knows...*/
-               /*it can... imagine a second CPU, which is pure scifi ATM,
-                 but one never knows ;)*/
-               ret.task = IDLETASK(cpu);
-               ret.time = SECONDS(1);
-       }
+#if (EXTRA > EXTRA_OFF)
+    if (unlikely(extra_runs(inf))) {
+        /*special treatment of domains running in extra time*/
+        desched_extra_dom(now, current);
+    }
+    else 
+#endif
+    {
+        desched_edf_dom(now, current);
+    }
+ check_waitq:
+    update_queues(now, runq, waitq);
+    /*now simply pick the first domain from the runqueue, which has the
+      earliest deadline, because the list is sorted*/
+    struct sedf_edom_info *runinf, *waitinf;
+    if (!list_empty(runq)) {
+        runinf   = list_entry(runq->next,struct sedf_edom_info,list);
+        ret.task = runinf->exec_domain;
+        if (!list_empty(waitq)) {
+            waitinf  = list_entry(waitq->next,
+                                  struct sedf_edom_info,list);
+            /*rerun scheduler, when scheduled domain reaches it's
+              end of slice or the first domain from the waitqueue
+              gets ready*/
+            ret.time = MIN(now + runinf->slice - runinf->cputime,
+                           PERIOD_BEGIN(waitinf)) - now;
+        }
+        else {
+            ret.time = runinf->slice - runinf->cputime;
+        }
+        CHECK(ret.time > 0);
+        goto sched_done;
+    }
+    if (!list_empty(waitq)) {
+        waitinf  = list_entry(waitq->next,struct sedf_edom_info, list);
+        /*we could not find any suitable domain 
+          => look for domains that are aware of extratime*/
+#if (EXTRA > EXTRA_OFF)
+        ret = sedf_do_extra_schedule(now, PERIOD_BEGIN(waitinf),
+                                     extraq, cpu);
+#else
+        ret.task = IDLETASK(cpu);
+        ret.time = PERIOD_BEGIN(waitinf) - now;
+#endif
+        CHECK(ret.time > 0);
+    }
+    else {
+        /*this could probably never happen, but one never knows...*/
+        /*it can... imagine a second CPU, which is pure scifi ATM,
+          but one never knows ;)*/
+        ret.task = IDLETASK(cpu);
+        ret.time = SECONDS(1);
+    }
 
-sched_done:    
-       /*TODO: Do something USEFUL when this happens and find out, why it
-       still can happen!!!*/
-       if (ret.time<0) {
-               printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n",
-                      ret.time);
-               ret.time = EXTRA_QUANTUM;
-       }
-       EDOM_INFO(ret.task)->sched_start_abs = now;
-       CHECK(ret.time > 0);
-       ASSERT(sedf_runnable(ret.task));
-       return ret;
+ sched_done: 
+    /*TODO: Do something USEFUL when this happens and find out, why it
+      still can happen!!!*/
+    if (ret.time<0) {
+        printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n",
+               ret.time);
+        ret.time = EXTRA_QUANTUM;
+    }
+    EDOM_INFO(ret.task)->sched_start_abs = now;
+    CHECK(ret.time > 0);
+    ASSERT(sedf_runnable(ret.task));
+    return ret;
 }
 
 static void sedf_sleep(struct exec_domain *d) {
-       PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",d->domain->domain_id, d->vcpu_id);
-       
-       if (is_idle_task(d->domain))
-               return;
+    PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",d->domain->domain_id, d->vcpu_id);
+    if (is_idle_task(d->domain))
+        return;
 
-       EDOM_INFO(d)->status |= SEDF_ASLEEP;
-       
-       if ( test_bit(_VCPUF_running, &d->vcpu_flags) ) {
-#ifdef ADV_SCHED_HISTO
-               adv_sched_hist_start(d->processor);
+    EDOM_INFO(d)->status |= SEDF_ASLEEP;
+    if ( test_bit(_VCPUF_running, &d->vcpu_flags) ) {
+        cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
+    }
+    else  {
+        if ( __task_on_queue(d) )
+            __del_from_queue(d);
+#if (EXTRA > EXTRA_OFF)
+        if (extraq_on(d, EXTRA_UTIL_Q)) 
+            extraq_del(d, EXTRA_UTIL_Q);
+#endif
+#if (EXTRA == EXTRA_BLOCK_WEIGHT)
+        if (extraq_on(d, EXTRA_PEN_Q))
+            extraq_del(d, EXTRA_PEN_Q);
 #endif
-               cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
-       }
-       else  {
-               if ( __task_on_queue(d) )
-                       __del_from_queue(d);
-               #if (EXTRA > EXTRA_OFF)
-               if (extraq_on(d, EXTRA_UTIL_Q)) 
-                       extraq_del(d, EXTRA_UTIL_Q);
-               #endif
-               #if (EXTRA == EXTRA_BLOCK_WEIGHT)
-               if (extraq_on(d, EXTRA_PEN_Q))
-                       extraq_del(d, EXTRA_PEN_Q);
-               #endif
-       }
+    }
 }
 
 /* This function wakes up a domain, i.e. moves them into the waitqueue
@@ -908,555 +884,554 @@ static void sedf_sleep(struct exec_domain *d) {
  */
 static inline void unblock_short_vcons
 (struct sedf_edom_info* inf, s_time_t now) {
-       inf->deadl_abs += inf->period;
-       inf->cputime = 0;
+    inf->deadl_abs += inf->period;
+    inf->cputime = 0;
 }
 
 static inline void unblock_short_cons(struct sedf_edom_info* inf, s_time_t now)
 {
-       /*treat blocked time as consumed by the domain*/
-       inf->cputime += now - inf->block_abs;   
-       if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
-               /*we don't have a reasonable amount of time in 
-                 our slice left :( => start in next period!*/
-               unblock_short_vcons(inf, now);
-       }
+    /*treat blocked time as consumed by the domain*/
+    inf->cputime += now - inf->block_abs; 
+    if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
+        /*we don't have a reasonable amount of time in 
+          our slice left :( => start in next period!*/
+        unblock_short_vcons(inf, now);
+    }
 #ifdef SEDF_STATS
-       else
-               inf->short_cont++;
+    else
+        inf->short_cont++;
 #endif
 }
 static inline void unblock_short_extra_support (struct sedf_edom_info* inf,
-   s_time_t now) {
-       /*this unblocking scheme tries to support the domain, by assigning it
-          a priority in extratime distribution according to the loss of time
-          in this slice due to blocking*/
-       s_time_t pen;
-       
-       /*no more realtime execution in this period!*/
-       inf->deadl_abs += inf->period;
-       if (likely(inf->block_abs)) {
-               //treat blocked time as consumed by the domain*/
-               /*inf->cputime += now - inf->block_abs;*/
-               /*penalty is time the domain would have
-                 had if it continued to run */
-               pen = (inf->slice - inf->cputime);
-               if (pen < 0) pen = 0;
-               /*accumulate all penalties over the periods*/
-               /*inf->short_block_lost_tot += pen;*/
-               /*set penalty to the current value*/
-               inf->short_block_lost_tot = pen;
-               /*not sure which one is better.. but seems to work well...*/
-               
-               if (inf->short_block_lost_tot) {
-                       inf->score[0] = (inf->period << 10) /
-                                        inf->short_block_lost_tot;
+                                                s_time_t now) {
+    /*this unblocking scheme tries to support the domain, by assigning it
+    a priority in extratime distribution according to the loss of time
+    in this slice due to blocking*/
+    s_time_t pen;
+    /*no more realtime execution in this period!*/
+    inf->deadl_abs += inf->period;
+    if (likely(inf->block_abs)) {
+        //treat blocked time as consumed by the domain*/
+        /*inf->cputime += now - inf->block_abs;*/
+        /*penalty is time the domain would have
+          had if it continued to run */
+        pen = (inf->slice - inf->cputime);
+        if (pen < 0) pen = 0;
+        /*accumulate all penalties over the periods*/
+        /*inf->short_block_lost_tot += pen;*/
+        /*set penalty to the current value*/
+        inf->short_block_lost_tot = pen;
+        /*not sure which one is better.. but seems to work well...*/
+  
+        if (inf->short_block_lost_tot) {
+            inf->score[0] = (inf->period << 10) /
+                inf->short_block_lost_tot;
 #ifdef SEDF_STATS
-                       inf->pen_extra_blocks++;
+            inf->pen_extra_blocks++;
 #endif
-                       if (extraq_on(inf->exec_domain, EXTRA_PEN_Q))
-                               /*remove domain for possible resorting!*/
-                               extraq_del(inf->exec_domain, EXTRA_PEN_Q);
-                       else
-                               /*remember that we want to be on the penalty q
-                                 so that we can continue when we (un-)block
-                                 in penalty-extratime*/
-                               inf->status |= EXTRA_WANT_PEN_Q;
-                       
-                       /*(re-)add domain to the penalty extraq*/
-                       extraq_add_sort_update(inf->exec_domain,
-                                        EXTRA_PEN_Q, 0);
-               }
-       }
-       /*give it a fresh slice in the next period!*/
-       inf->cputime = 0;
+            if (extraq_on(inf->exec_domain, EXTRA_PEN_Q))
+                /*remove domain for possible resorting!*/
+                extraq_del(inf->exec_domain, EXTRA_PEN_Q);
+            else
+                /*remember that we want to be on the penalty q
+                  so that we can continue when we (un-)block
+                  in penalty-extratime*/
+                inf->status |= EXTRA_WANT_PEN_Q;
+   
+            /*(re-)add domain to the penalty extraq*/
+            extraq_add_sort_update(inf->exec_domain,
+                                   EXTRA_PEN_Q, 0);
+        }
+    }
+    /*give it a fresh slice in the next period!*/
+    inf->cputime = 0;
 }
 static inline void unblock_long_vcons(struct sedf_edom_info* inf, s_time_t now)
 {
-       /* align to next future period */
-       inf->deadl_abs += (DIV_UP(now - inf->deadl_abs, inf->period) +1)
-                        * inf->period;
-       inf->cputime = 0;
+    /* align to next future period */
+    inf->deadl_abs += (DIV_UP(now - inf->deadl_abs, inf->period) +1)
+        * inf->period;
+    inf->cputime = 0;
 }
 
 static inline void unblock_long_cons_a (struct sedf_edom_info* inf,
-   s_time_t now) {
-       /*treat the time the domain was blocked in the
-         CURRENT period as consumed by the domain*/
-       inf->cputime = (now - inf->deadl_abs) % inf->period;    
-       if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
-               /*we don't have a reasonable amount of time in our slice
-                 left :( => start in next period!*/
-               unblock_long_vcons(inf, now);
-       }
+                                        s_time_t now) {
+    /*treat the time the domain was blocked in the
+   CURRENT period as consumed by the domain*/
+    inf->cputime = (now - inf->deadl_abs) % inf->period; 
+    if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
+        /*we don't have a reasonable amount of time in our slice
+          left :( => start in next period!*/
+        unblock_long_vcons(inf, now);
+    }
 }
 static inline void unblock_long_cons_b(struct sedf_edom_info* inf,s_time_t now) {
-       /*Conservative 2b*/
-       /*Treat the unblocking time as a start of a new period */
-       inf->deadl_abs = now + inf->period;
-       inf->cputime = 0;
+    /*Conservative 2b*/
+    /*Treat the unblocking time as a start of a new period */
+    inf->deadl_abs = now + inf->period;
+    inf->cputime = 0;
 }
 static inline void unblock_long_cons_c(struct sedf_edom_info* inf,s_time_t now) {
-       if (likely(inf->latency)) {
-               /*scale the slice and period accordingly to the latency hint*/
-               /*reduce period temporarily to the latency hint*/
-               inf->period = inf->latency;
-               /*this results in max. 4s slice/period length*/
-               ASSERT((inf->period < ULONG_MAX)
-                   && (inf->slice_orig < ULONG_MAX));
-               /*scale slice accordingly, so that utilisation stays the same*/
-               inf->slice = (inf->period * inf->slice_orig)
-                           / inf->period_orig;
-               inf->deadl_abs = now + inf->period;
-               inf->cputime = 0;
-       }       
-       else {
-               /*we don't have a latency hint.. use some other technique*/
-               unblock_long_cons_b(inf, now);
-       }
+    if (likely(inf->latency)) {
+        /*scale the slice and period accordingly to the latency hint*/
+        /*reduce period temporarily to the latency hint*/
+        inf->period = inf->latency;
+        /*this results in max. 4s slice/period length*/
+        ASSERT((inf->period < ULONG_MAX)
+               && (inf->slice_orig < ULONG_MAX));
+        /*scale slice accordingly, so that utilisation stays the same*/
+        inf->slice = (inf->period * inf->slice_orig)
+            / inf->period_orig;
+        inf->deadl_abs = now + inf->period;
+        inf->cputime = 0;
+    } 
+    else {
+        /*we don't have a latency hint.. use some other technique*/
+        unblock_long_cons_b(inf, now);
+    }
 }
 /*a new idea of dealing with short blocks: burst period scaling*/
 static inline void unblock_short_burst(struct sedf_edom_info* inf, s_time_t now)
 {
-       /*treat blocked time as consumed by the domain*/
-       inf->cputime += now - inf->block_abs;
-       
-       if (inf->cputime + EXTRA_QUANTUM <= inf->slice) {
-               /*if we can still use some time in the current slice
-                 then use it!*/
+    /*treat blocked time as consumed by the domain*/
+    inf->cputime += now - inf->block_abs;
+    if (inf->cputime + EXTRA_QUANTUM <= inf->slice) {
+        /*if we can still use some time in the current slice
+          then use it!*/
 #ifdef SEDF_STATS
-               /*we let the domain run in the current period*/
-               inf->short_cont++;
+        /*we let the domain run in the current period*/
+        inf->short_cont++;
 #endif
-       }
-       else {
-               /*we don't have a reasonable amount of time in
-                 our slice left => switch to burst mode*/
-               if (likely(inf->unblock_abs)) {
-                       /*set the period-length to the current blocking
-                         interval, possible enhancements: average over last
-                         blocking intervals, user-specified minimum,...*/
-                       inf->period = now - inf->unblock_abs;
-                       /*check for overflow on multiplication*/
-                       ASSERT((inf->period < ULONG_MAX) 
-                           && (inf->slice_orig < ULONG_MAX));
-                       /*scale slice accordingly, so that utilisation
-                         stays the same*/
-                       inf->slice = (inf->period * inf->slice_orig)
-                                   / inf->period_orig;
-                       /*set new (shorter) deadline*/
-                       inf->deadl_abs += inf->period;
-               }
-               else {
-                       /*in case we haven't unblocked before
-                         start in next period!*/
-                       inf->cputime=0;
-                       inf->deadl_abs += inf->period;
-               }
-       }
-       inf->unblock_abs = now;
+    }
+    else {
+        /*we don't have a reasonable amount of time in
+          our slice left => switch to burst mode*/
+        if (likely(inf->unblock_abs)) {
+            /*set the period-length to the current blocking
+              interval, possible enhancements: average over last
+              blocking intervals, user-specified minimum,...*/
+            inf->period = now - inf->unblock_abs;
+            /*check for overflow on multiplication*/
+            ASSERT((inf->period < ULONG_MAX) 
+                   && (inf->slice_orig < ULONG_MAX));
+            /*scale slice accordingly, so that utilisation
+              stays the same*/
+            inf->slice = (inf->period * inf->slice_orig)
+                / inf->period_orig;
+            /*set new (shorter) deadline*/
+            inf->deadl_abs += inf->period;
+        }
+        else {
+            /*in case we haven't unblocked before
+              start in next period!*/
+            inf->cputime=0;
+            inf->deadl_abs += inf->period;
+        }
+    }
+    inf->unblock_abs = now;
 }
 static inline void unblock_long_burst(struct sedf_edom_info* inf, s_time_t now) {
-       if (unlikely(inf->latency && (inf->period > inf->latency))) {
-               /*scale the slice and period accordingly to the latency hint*/
-               inf->period = inf->latency;
-               /*check for overflows on multiplication*/
-               ASSERT((inf->period < ULONG_MAX)
-                   && (inf->slice_orig < ULONG_MAX));
-               /*scale slice accordingly, so that utilisation stays the same*/
-               inf->slice = (inf->period * inf->slice_orig)
-                           / inf->period_orig;
-               inf->deadl_abs = now + inf->period;
-               inf->cputime = 0;
-       }
-       else {
-               /*we don't have a latency hint.. or we are currently in 
-                "burst mode": use some other technique
-                 NB: this should be in fact the normal way of operation,
-                 when we are in sync with the device!*/
-               unblock_long_cons_b(inf, now);
-       }
-       inf->unblock_abs = now;
+    if (unlikely(inf->latency && (inf->period > inf->latency))) {
+        /*scale the slice and period accordingly to the latency hint*/
+        inf->period = inf->latency;
+        /*check for overflows on multiplication*/
+        ASSERT((inf->period < ULONG_MAX)
+               && (inf->slice_orig < ULONG_MAX));
+        /*scale slice accordingly, so that utilisation stays the same*/
+        inf->slice = (inf->period * inf->slice_orig)
+            / inf->period_orig;
+        inf->deadl_abs = now + inf->period;
+        inf->cputime = 0;
+    }
+    else {
+        /*we don't have a latency hint.. or we are currently in 
+          "burst mode": use some other technique
+          NB: this should be in fact the normal way of operation,
+          when we are in sync with the device!*/
+        unblock_long_cons_b(inf, now);
+    }
+    inf->unblock_abs = now;
 }
 
-#define DOMAIN_EDF             1
-#define DOMAIN_EXTRA_PEN       2
-#define DOMAIN_EXTRA_UTIL      3
-#define DOMAIN_IDLE            4
+#define DOMAIN_EDF   1
+#define DOMAIN_EXTRA_PEN  2
+#define DOMAIN_EXTRA_UTIL  3
+#define DOMAIN_IDLE   4
 static inline int get_run_type(struct exec_domain* d) {
-       struct sedf_edom_info* inf = EDOM_INFO(d);
-       if (is_idle_task(d->domain))
-               return DOMAIN_IDLE;
-       if (inf->status & EXTRA_RUN_PEN)
-               return DOMAIN_EXTRA_PEN;
-       if (inf->status & EXTRA_RUN_UTIL)
-               return DOMAIN_EXTRA_UTIL;
-       return DOMAIN_EDF;
+    struct sedf_edom_info* inf = EDOM_INFO(d);
+    if (is_idle_task(d->domain))
+        return DOMAIN_IDLE;
+    if (inf->status & EXTRA_RUN_PEN)
+        return DOMAIN_EXTRA_PEN;
+    if (inf->status & EXTRA_RUN_UTIL)
+        return DOMAIN_EXTRA_UTIL;
+    return DOMAIN_EDF;
 }
 /*Compares two domains in the relation of whether the one is allowed to
   interrupt the others execution.
   It returns true (!=0) if a switch to the other domain is good.
   Current Priority scheme is as follows:
-       EDF > L0 (penalty based) extra-time > 
-       L1 (utilization) extra-time > idle-domain
+   EDF > L0 (penalty based) extra-time > 
+   L1 (utilization) extra-time > idle-domain
   In the same class priorities are assigned as following:
-       EDF: early deadline > late deadline
-       L0 extra-time: lower score > higher score*/
+   EDF: early deadline > late deadline
+   L0 extra-time: lower score > higher score*/
 static inline int should_switch(struct exec_domain* cur,
-   struct exec_domain* other, s_time_t now) {
-       struct sedf_edom_info *cur_inf, *other_inf;
-       cur_inf   = EDOM_INFO(cur);
-       other_inf = EDOM_INFO(other);
-       
      /*check whether we need to make an earlier sched-decision*/
-       if ((PERIOD_BEGIN(other_inf) < 
-            schedule_data[other->processor].s_timer.expires))
-               return 1;
-       /*no timing-based switches need to be taken into account here*/
-       switch (get_run_type(cur)) {
-               case DOMAIN_EDF:
-                       /* do not interrupt a running EDF domain */ 
-                       return 0;
-               case DOMAIN_EXTRA_PEN:
-                       /*check whether we also want 
-                         the L0 ex-q with lower score*/
-                       if ((other_inf->status & EXTRA_WANT_PEN_Q)
-                       &&  (other_inf->score[EXTRA_PEN_Q] < 
-                            cur_inf->score[EXTRA_PEN_Q]))
-                               return 1;
-                       else    return 0;
-               case DOMAIN_EXTRA_UTIL:
-                       /*check whether we want the L0 extraq, don't
-                         switch if both domains want L1 extraq */
-                       if (other_inf->status & EXTRA_WANT_PEN_Q)
-                               return 1;
-                       else    return 0;
-               case DOMAIN_IDLE:
-                       return 1;
-       }
-       return 1;
+                                struct exec_domain* other, s_time_t now) {
+    struct sedf_edom_info *cur_inf, *other_inf;
+    cur_inf   = EDOM_INFO(cur);
+    other_inf = EDOM_INFO(other);
+ /*check whether we need to make an earlier sched-decision*/
+    if ((PERIOD_BEGIN(other_inf) < 
+         schedule_data[other->processor].s_timer.expires))
+        return 1;
+    /*no timing-based switches need to be taken into account here*/
+    switch (get_run_type(cur)) {
+    case DOMAIN_EDF:
+        /* do not interrupt a running EDF domain */ 
+        return 0;
+    case DOMAIN_EXTRA_PEN:
+        /*check whether we also want 
+          the L0 ex-q with lower score*/
+        if ((other_inf->status & EXTRA_WANT_PEN_Q)
+            &&  (other_inf->score[EXTRA_PEN_Q] < 
+                 cur_inf->score[EXTRA_PEN_Q]))
+            return 1;
+        else return 0;
+    case DOMAIN_EXTRA_UTIL:
+        /*check whether we want the L0 extraq, don't
+          switch if both domains want L1 extraq */
+        if (other_inf->status & EXTRA_WANT_PEN_Q)
+            return 1;
+        else return 0;
+    case DOMAIN_IDLE:
+        return 1;
+    }
+    return 1;
 }
 void sedf_wake(struct exec_domain *d) {
-       s_time_t              now = NOW();
-       struct sedf_edom_info* inf = EDOM_INFO(d);
-       
-       PRINT(3, "sedf_wake was called, domain-id %i.%i\n",d->domain->domain_id, d->vcpu_id);
-       
-       if (unlikely(is_idle_task(d->domain)))
-               return;
-                       
-       if ( unlikely(__task_on_queue(d)) ) {
-               PRINT(3,"\tdomain %i.%i is already in some queue\n",
-                     d->domain->domain_id, d->vcpu_id);
-               return;
-       }
-       ASSERT(!sedf_runnable(d));
-       inf->status &= ~SEDF_ASLEEP;
-       ASSERT(!extraq_on(d, EXTRA_UTIL_Q));
-       ASSERT(!extraq_on(d, EXTRA_PEN_Q));
-       
-       if (unlikely(inf->deadl_abs == 0))
-               /*initial setup of the deadline*/
-               inf->deadl_abs = now + inf->slice;
-               
-       PRINT(3,"waking up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
-               "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
-                inf->period, now);
-#ifdef SEDF_STATS      
-       inf->block_tot++;
+    s_time_t              now = NOW();
+    struct sedf_edom_info* inf = EDOM_INFO(d);
+    PRINT(3, "sedf_wake was called, domain-id %i.%i\n",d->domain->domain_id, d->vcpu_id);
+    if (unlikely(is_idle_task(d->domain)))
+        return;
+   
+    if ( unlikely(__task_on_queue(d)) ) {
+        PRINT(3,"\tdomain %i.%i is already in some queue\n",
+              d->domain->domain_id, d->vcpu_id);
+        return;
+    }
+    ASSERT(!sedf_runnable(d));
+    inf->status &= ~SEDF_ASLEEP;
+    ASSERT(!extraq_on(d, EXTRA_UTIL_Q));
+    ASSERT(!extraq_on(d, EXTRA_PEN_Q));
+    if (unlikely(inf->deadl_abs == 0))
+        /*initial setup of the deadline*/
+        inf->deadl_abs = now + inf->slice;
+  
+    PRINT(3,"waking up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
+          "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
+          inf->period, now);
+#ifdef SEDF_STATS 
+    inf->block_tot++;
+#endif
+    if (unlikely(now < PERIOD_BEGIN(inf))) {
+        PRINT(4,"extratime unblock\n");
+        /* unblocking in extra-time! */
+#if (EXTRA == EXTRA_BLOCK_WEIGHT)
+        if (inf->status & EXTRA_WANT_PEN_Q) {
+            /*we have a domain that wants compensation
+              for block penalty and did just block in
+              its compensation time. Give it another
+              chance!*/
+            extraq_add_sort_update(d, EXTRA_PEN_Q, 0);
+        }
 #endif
-       if (unlikely(now < PERIOD_BEGIN(inf))) {
-               PRINT(4,"extratime unblock\n");
-               /* unblocking in extra-time! */
-               #if (EXTRA == EXTRA_BLOCK_WEIGHT)
-               if (inf->status & EXTRA_WANT_PEN_Q) {
-                       /*we have a domain that wants compensation
-                         for block penalty and did just block in
-                         its compensation time. Give it another
-                         chance!*/
-                       extraq_add_sort_update(d, EXTRA_PEN_Q, 0);
-               }
-               #endif
-               extraq_check_add_unblocked(d, 0);
-       }               
-       else {          
-               if (now < inf->deadl_abs) {
-                       PRINT(4,"short unblocking\n");
-                       /*short blocking*/
+        extraq_check_add_unblocked(d, 0);
+    }  
+    else {  
+        if (now < inf->deadl_abs) {
+            PRINT(4,"short unblocking\n");
+            /*short blocking*/
 #ifdef SEDF_STATS
-                       inf->short_block_tot++;
+            inf->short_block_tot++;
+#endif
+#if (UNBLOCK <= UNBLOCK_ATROPOS)
+            unblock_short_vcons(inf, now);
+#elif (UNBLOCK == UNBLOCK_SHORT_RESUME)
+            unblock_short_cons(inf, now);
+#elif (UNBLOCK == UNBLOCK_BURST)
+            unblock_short_burst(inf, now);
+#elif (UNBLOCK == UNBLOCK_EXTRA_SUPPORT)
+            unblock_short_extra_support(inf, now);
 #endif
-                       #if (UNBLOCK <= UNBLOCK_ATROPOS)
-                       unblock_short_vcons(inf, now);
-                       #elif (UNBLOCK == UNBLOCK_SHORT_RESUME)
-                       unblock_short_cons(inf, now);
-                       #elif (UNBLOCK == UNBLOCK_BURST)
-                       unblock_short_burst(inf, now);
-                       #elif (UNBLOCK == UNBLOCK_EXTRA_SUPPORT)
-                       unblock_short_extra_support(inf, now);
-                       #endif
 
-                       extraq_check_add_unblocked(d, 1);
-               }
-               else {
-                       PRINT(4,"long unblocking\n");
-                       /*long unblocking*/
+            extraq_check_add_unblocked(d, 1);
+        }
+        else {
+            PRINT(4,"long unblocking\n");
+            /*long unblocking*/
 #ifdef SEDF_STATS
-                       inf->long_block_tot++;
+            inf->long_block_tot++;
+#endif
+#if (UNBLOCK == UNBLOCK_ISOCHRONOUS_EDF)
+            unblock_long_vcons(inf, now);
+#elif (UNBLOCK == UNBLOCK_EDF \
+       || UNBLOCK == UNBLOCK_EXTRA_SUPPORT)
+            unblock_long_cons_b(inf, now);
+#elif (UNBLOCK == UNBLOCK_ATROPOS)
+            unblock_long_cons_c(inf, now);
+#elif (UNBLOCK == UNBLOCK_SHORT_RESUME)
+            unblock_long_cons_b(inf, now);
+            /*unblock_short_cons_c(inf, now);*/
+#elif (UNBLOCK == UNBLOCK_BURST)
+            unblock_long_burst(inf, now);
 #endif
-                       #if (UNBLOCK == UNBLOCK_ISOCHRONOUS_EDF)
-                       unblock_long_vcons(inf, now);
-                       #elif (UNBLOCK == UNBLOCK_EDF \
-                           || UNBLOCK == UNBLOCK_EXTRA_SUPPORT)
-                       unblock_long_cons_b(inf, now);
-                       #elif (UNBLOCK == UNBLOCK_ATROPOS)
-                       unblock_long_cons_c(inf, now);
-                       #elif (UNBLOCK == UNBLOCK_SHORT_RESUME)
-                       unblock_long_cons_b(inf, now);
-                       /*unblock_short_cons_c(inf, now);*/
-                       #elif (UNBLOCK == UNBLOCK_BURST)
-                       unblock_long_burst(inf, now);
-                       #endif
 
-                       extraq_check_add_unblocked(d, 1);
-               }
-       }
-       PRINT(3,"woke up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
-               "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
-               inf->period, now);
-       if (PERIOD_BEGIN(inf) > now) {
-               __add_to_waitqueue_sort(d);
-               PRINT(3,"added to waitq\n");
-       }
-       else {
-               __add_to_runqueue_sort(d);
-               PRINT(3,"added to runq\n");
-       }
-       
+            extraq_check_add_unblocked(d, 1);
+        }
+    }
+    PRINT(3,"woke up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
+          "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
+          inf->period, now);
+    if (PERIOD_BEGIN(inf) > now) {
+        __add_to_waitqueue_sort(d);
+        PRINT(3,"added to waitq\n");
+    }
+    else {
+        __add_to_runqueue_sort(d);
+        PRINT(3,"added to runq\n");
+    }
 #ifdef SEDF_STATS
-       /*do some statistics here...*/
-       if (inf->block_abs != 0) {
-               inf->block_time_tot += now - inf->block_abs;
-               inf->penalty_time_tot +=
-                  PERIOD_BEGIN(inf) + inf->cputime - inf->block_abs;
-       }
-#endif
-       /*sanity check: make sure each extra-aware domain IS on the util-q!*/
-       ASSERT(IMPLY(inf->status & EXTRA_AWARE, extraq_on(d, EXTRA_UTIL_Q)));
-       ASSERT(__task_on_queue(d));
-       /*check whether the awakened task needs to invoke the do_schedule
-         routine. Try to avoid unnecessary runs but:
-         Save approximation: Always switch to scheduler!*/
-       if (should_switch(schedule_data[d->processor].curr, d, now)){
-#ifdef ADV_SCHED_HISTO
-               adv_sched_hist_start(d->processor);
+    /*do some statistics here...*/
+    if (inf->block_abs != 0) {
+        inf->block_time_tot += now - inf->block_abs;
+        inf->penalty_time_tot +=
+            PERIOD_BEGIN(inf) + inf->cputime - inf->block_abs;
+    }
 #endif
-               cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
-       }
+    /*sanity check: make sure each extra-aware domain IS on the util-q!*/
+    ASSERT(IMPLY(inf->status & EXTRA_AWARE, extraq_on(d, EXTRA_UTIL_Q)));
+    ASSERT(__task_on_queue(d));
+    /*check whether the awakened task needs to invoke the do_schedule
+      routine. Try to avoid unnecessary runs but:
+      Save approximation: Always switch to scheduler!*/
+    if (should_switch(schedule_data[d->processor].curr, d, now))
+        cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
 }
 
 /*Print a lot of use-{full, less} information about a domains in the system*/
 static void sedf_dump_domain(struct exec_domain *d) {
-       printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
-               test_bit(_VCPUF_running, &d->vcpu_flags) ? 'T':'F');
-       printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu c=%"PRIu64" sc=%i xtr(%s)=%"PRIu64" ew=%hu",
-         EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs,
-         EDOM_INFO(d)->weight, d->cpu_time, EDOM_INFO(d)->score[EXTRA_UTIL_Q],
-        (EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no",
-         EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight);
-       if (d->cpu_time !=0)
-               printf(" (%"PRIu64"%%)", (EDOM_INFO(d)->extra_time_tot * 100)
-                                / d->cpu_time);
+    printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
+           test_bit(_VCPUF_running, &d->vcpu_flags) ? 'T':'F');
+    printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu c=%"PRIu64" sc=%i xtr(%s)=%"PRIu64" ew=%hu",
+           EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs,
+           EDOM_INFO(d)->weight, d->cpu_time, EDOM_INFO(d)->score[EXTRA_UTIL_Q],
+           (EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no",
+           EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight);
+    if (d->cpu_time !=0)
+        printf(" (%"PRIu64"%%)", (EDOM_INFO(d)->extra_time_tot * 100)
+               / d->cpu_time);
 #ifdef SEDF_STATS
-       if (EDOM_INFO(d)->block_time_tot!=0)
-               printf(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) /
-                                    EDOM_INFO(d)->block_time_tot);
-       if (EDOM_INFO(d)->block_tot!=0)
-               printf("\n   blks=%u sh=%u (%u%%) (shc=%u (%u%%) shex=%i "\
-                      "shexsl=%i) l=%u (%u%%) avg: b=%"PRIu64" p=%"PRIu64"",
-                   EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_block_tot,
-                  (EDOM_INFO(d)->short_block_tot * 100) 
-                 / EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_cont,
-                  (EDOM_INFO(d)->short_cont * 100) / EDOM_INFO(d)->block_tot,
-                   EDOM_INFO(d)->pen_extra_blocks,
-                   EDOM_INFO(d)->pen_extra_slices,
-                   EDOM_INFO(d)->long_block_tot,
-                  (EDOM_INFO(d)->long_block_tot * 100) / EDOM_INFO(d)->block_tot,
-                  (EDOM_INFO(d)->block_time_tot) / EDOM_INFO(d)->block_tot,
-                  (EDOM_INFO(d)->penalty_time_tot) / EDOM_INFO(d)->block_tot);
+    if (EDOM_INFO(d)->block_time_tot!=0)
+        printf(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) /
+               EDOM_INFO(d)->block_time_tot);
+    if (EDOM_INFO(d)->block_tot!=0)
+        printf("\n   blks=%u sh=%u (%u%%) (shc=%u (%u%%) shex=%i "\
+               "shexsl=%i) l=%u (%u%%) avg: b=%"PRIu64" p=%"PRIu64"",
+               EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_block_tot,
+               (EDOM_INFO(d)->short_block_tot * 100) 
+               / EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_cont,
+               (EDOM_INFO(d)->short_cont * 100) / EDOM_INFO(d)->block_tot,
+               EDOM_INFO(d)->pen_extra_blocks,
+               EDOM_INFO(d)->pen_extra_slices,
+               EDOM_INFO(d)->long_block_tot,
+               (EDOM_INFO(d)->long_block_tot * 100) / EDOM_INFO(d)->block_tot,
+               (EDOM_INFO(d)->block_time_tot) / EDOM_INFO(d)->block_tot,
+               (EDOM_INFO(d)->penalty_time_tot) / EDOM_INFO(d)->block_tot);
 #endif
-       printf("\n");
+    printf("\n");
 }
 
 /*dumps all domains on hte specified cpu*/
 static void sedf_dump_cpu_state(int i)
 {
-       struct list_head      *list, *queue, *tmp;
-       struct sedf_edom_info *d_inf;
-       struct domain         *d;
-       struct exec_domain    *ed;
-       int loop = 0;
-       
-       printk("now=%"PRIu64"\n",NOW());
-       queue = RUNQ(i);
-       printk("RUNQ rq %lx   n: %lx, p: %lx\n",  (unsigned long)queue,
-               (unsigned long) queue->next, (unsigned long) queue->prev);
-       list_for_each_safe ( list, tmp, queue ) {
-               printk("%3d: ",loop++);
-               d_inf = list_entry(list, struct sedf_edom_info, list);
-               sedf_dump_domain(d_inf->exec_domain);
-       }
-       
-       queue = WAITQ(i); loop = 0;
-       printk("\nWAITQ rq %lx   n: %lx, p: %lx\n",  (unsigned long)queue,
-               (unsigned long) queue->next, (unsigned long) queue->prev);
-       list_for_each_safe ( list, tmp, queue ) {
-               printk("%3d: ",loop++);
-               d_inf = list_entry(list, struct sedf_edom_info, list);
-               sedf_dump_domain(d_inf->exec_domain);
-       }
-       
-       queue = EXTRAQ(i,EXTRA_PEN_Q); loop = 0;
-       printk("\nEXTRAQ (penalty) rq %lx   n: %lx, p: %lx\n",
-              (unsigned long)queue, (unsigned long) queue->next,
-              (unsigned long) queue->prev);
-       list_for_each_safe ( list, tmp, queue ) {
-               d_inf = list_entry(list, struct sedf_edom_info,
-                                  extralist[EXTRA_PEN_Q]);
-               printk("%3d: ",loop++);
-               sedf_dump_domain(d_inf->exec_domain);
-       }
-       
-       queue = EXTRAQ(i,EXTRA_UTIL_Q); loop = 0;
-       printk("\nEXTRAQ (utilization) rq %lx   n: %lx, p: %lx\n",
-              (unsigned long)queue, (unsigned long) queue->next,
-              (unsigned long) queue->prev);
-       list_for_each_safe ( list, tmp, queue ) {
-               d_inf = list_entry(list, struct sedf_edom_info,
-                                  extralist[EXTRA_UTIL_Q]);
-               printk("%3d: ",loop++);
-               sedf_dump_domain(d_inf->exec_domain);
-       }
-       
-       loop = 0;
-       printk("\nnot on Q\n");
-       for_each_domain(d)
-               for_each_exec_domain(d, ed)
-               {
-                       if (!__task_on_queue(ed) && (ed->processor == i)) {
-                               printk("%3d: ",loop++);
-                               sedf_dump_domain(ed);
-                       }
-               }
+    struct list_head      *list, *queue, *tmp;
+    struct sedf_edom_info *d_inf;
+    struct domain         *d;
+    struct exec_domain    *ed;
+    int loop = 0;
+    printk("now=%"PRIu64"\n",NOW());
+    queue = RUNQ(i);
+    printk("RUNQ rq %lx   n: %lx, p: %lx\n",  (unsigned long)queue,
+           (unsigned long) queue->next, (unsigned long) queue->prev);
+    list_for_each_safe ( list, tmp, queue ) {
+        printk("%3d: ",loop++);
+        d_inf = list_entry(list, struct sedf_edom_info, list);
+        sedf_dump_domain(d_inf->exec_domain);
+    }
+    queue = WAITQ(i); loop = 0;
+    printk("\nWAITQ rq %lx   n: %lx, p: %lx\n",  (unsigned long)queue,
+           (unsigned long) queue->next, (unsigned long) queue->prev);
+    list_for_each_safe ( list, tmp, queue ) {
+        printk("%3d: ",loop++);
+        d_inf = list_entry(list, struct sedf_edom_info, list);
+        sedf_dump_domain(d_inf->exec_domain);
+    }
+    queue = EXTRAQ(i,EXTRA_PEN_Q); loop = 0;
+    printk("\nEXTRAQ (penalty) rq %lx   n: %lx, p: %lx\n",
+           (unsigned long)queue, (unsigned long) queue->next,
+           (unsigned long) queue->prev);
+    list_for_each_safe ( list, tmp, queue ) {
+        d_inf = list_entry(list, struct sedf_edom_info,
+                           extralist[EXTRA_PEN_Q]);
+        printk("%3d: ",loop++);
+        sedf_dump_domain(d_inf->exec_domain);
+    }
+    queue = EXTRAQ(i,EXTRA_UTIL_Q); loop = 0;
+    printk("\nEXTRAQ (utilization) rq %lx   n: %lx, p: %lx\n",
+           (unsigned long)queue, (unsigned long) queue->next,
+           (unsigned long) queue->prev);
+    list_for_each_safe ( list, tmp, queue ) {
+        d_inf = list_entry(list, struct sedf_edom_info,
+                           extralist[EXTRA_UTIL_Q]);
+        printk("%3d: ",loop++);
+        sedf_dump_domain(d_inf->exec_domain);
+    }
+    loop = 0;
+    printk("\nnot on Q\n");
+    for_each_domain(d)
+        for_each_exec_domain(d, ed)
+    {
+        if (!__task_on_queue(ed) && (ed->processor == i)) {
+            printk("%3d: ",loop++);
+            sedf_dump_domain(ed);
+        }
+    }
 }
 /*Adjusts periods and slices of the domains accordingly to their weights*/
 static inline int sedf_adjust_weights(struct sched_adjdom_cmd *cmd) {
-       struct exec_domain *p;
-       struct domain      *d;
-       int                 sumw[NR_CPUS];
-       s_time_t            sumt[NR_CPUS];
-       int                 cpu;
-       
-       for (cpu=0; cpu < NR_CPUS; cpu++) {
-               sumw[cpu] = 0;
-               sumt[cpu] = 0;
-       }
-       /*sum up all weights*/
-       for_each_domain(d)
-         for_each_exec_domain(d, p) {
-               if (EDOM_INFO(p)->weight)
-                       sumw[p->processor] += EDOM_INFO(p)->weight;
-               else {
-                       /*don't modify domains who don't have a weight, but sum
-                         up the time they need, projected to a WEIGHT_PERIOD,
-                         so that this time is not given to the weight-driven
-                         domains*/
-                       /*check for overflows*/
-                       ASSERT((WEIGHT_PERIOD < ULONG_MAX) 
-                           && (EDOM_INFO(p)->slice_orig < ULONG_MAX));
-                       sumt[p->processor] += (WEIGHT_PERIOD *
-                           EDOM_INFO(p)->slice_orig) / EDOM_INFO(p)->period_orig;
-               }
-       }
-       /*adjust all slices (and periods) to the new weight*/
-       for_each_domain(d) 
-         for_each_exec_domain(d, p) {
-               if (EDOM_INFO(p)->weight) {
-                       EDOM_INFO(p)->period_orig = 
-                            EDOM_INFO(p)->period = WEIGHT_PERIOD;
-                       EDOM_INFO(p)->slice_orig  =
-                             EDOM_INFO(p)->slice = (EDOM_INFO(p)->weight *
-                             (WEIGHT_PERIOD -WEIGHT_SAFETY -
-                              sumt[p->processor])) / sumw[p->processor];
-               }
-       }
-       return 0;
+    struct exec_domain *p;
+    struct domain      *d;
+    int                 sumw[NR_CPUS];
+    s_time_t            sumt[NR_CPUS];
+    int                 cpu;
+    for (cpu=0; cpu < NR_CPUS; cpu++) {
+        sumw[cpu] = 0;
+        sumt[cpu] = 0;
+    }
+    /*sum up all weights*/
+    for_each_domain(d)
+        for_each_exec_domain(d, p) {
+        if (EDOM_INFO(p)->weight)
+            sumw[p->processor] += EDOM_INFO(p)->weight;
+        else {
+            /*don't modify domains who don't have a weight, but sum
+              up the time they need, projected to a WEIGHT_PERIOD,
+              so that this time is not given to the weight-driven
+              domains*/
+            /*check for overflows*/
+            ASSERT((WEIGHT_PERIOD < ULONG_MAX) 
+                   && (EDOM_INFO(p)->slice_orig < ULONG_MAX));
+            sumt[p->processor] += 
+                (WEIGHT_PERIOD * EDOM_INFO(p)->slice_orig) / 
+                EDOM_INFO(p)->period_orig;
+        }
+    }
+    /*adjust all slices (and periods) to the new weight*/
+    for_each_domain(d) 
+        for_each_exec_domain(d, p) {
+        if (EDOM_INFO(p)->weight) {
+            EDOM_INFO(p)->period_orig = 
+                EDOM_INFO(p)->period  = WEIGHT_PERIOD;
+            EDOM_INFO(p)->slice_orig  =
+                EDOM_INFO(p)->slice   = 
+                (EDOM_INFO(p)->weight *
+                 (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[p->processor])) / 
+                sumw[p->processor];
+        }
+    }
+    return 0;
 }
 
 /* set or fetch domain scheduling parameters */
 static int sedf_adjdom(struct domain *p, struct sched_adjdom_cmd *cmd) {
-       struct exec_domain *ed;
+    struct exec_domain *ed;
 
-       PRINT(2,"sedf_adjdom was called, domain-id %i new period %"PRIu64" "\
-               "new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
-               p->domain_id, cmd->u.sedf.period, cmd->u.sedf.slice,
-               cmd->u.sedf.latency, (cmd->u.sedf.extratime)?"yes":"no");
-       if ( cmd->direction == SCHED_INFO_PUT )
-       {
-               /*check for sane parameters*/
-               if (!cmd->u.sedf.period && !cmd->u.sedf.weight)
-                       return -EINVAL;
-               if (cmd->u.sedf.weight) {
-                       if ((cmd->u.sedf.extratime & EXTRA_AWARE) &&
-                           (! cmd->u.sedf.period)) {
-                       /*weight driven domains with xtime ONLY!*/
-                               for_each_exec_domain(p, ed) {
-                                 EDOM_INFO(ed)->extraweight = cmd->u.sedf.weight;
-                                 EDOM_INFO(ed)->weight = 0;
-                                 EDOM_INFO(ed)->slice = 0;
-                                 EDOM_INFO(ed)->period = WEIGHT_PERIOD;
-                               }
-                       } else {
-                       /*weight driven domains with real-time execution*/
-                               for_each_exec_domain(p, ed)
-                                 EDOM_INFO(ed)->weight = cmd->u.sedf.weight;
-                       }
-               }
-               else {
-                       /*time driven domains*/
-                       for_each_exec_domain(p, ed) {
-                               /* sanity checking! */
-                               if(cmd->u.sedf.slice > cmd->u.sedf.period )
-                                       return -EINVAL;
-                               EDOM_INFO(ed)->weight = 0;
-                               EDOM_INFO(ed)->extraweight = 0;
-                               EDOM_INFO(ed)->period_orig = 
-                               EDOM_INFO(ed)->period   = cmd->u.sedf.period;
-                               EDOM_INFO(ed)->slice_orig  = 
-                               EDOM_INFO(ed)->slice    = cmd->u.sedf.slice;
-                       }
-               }
-               if (sedf_adjust_weights(cmd))
-                       return -EINVAL;
-                       
-               for_each_exec_domain(p, ed) {
-                       EDOM_INFO(ed)->status  = (EDOM_INFO(ed)->status &
-                         ~EXTRA_AWARE) | (cmd->u.sedf.extratime & EXTRA_AWARE);
-                       EDOM_INFO(ed)->latency = cmd->u.sedf.latency;
-                       extraq_check(ed);
-               }
-       }
-       else if ( cmd->direction == SCHED_INFO_GET )
-       {
-               cmd->u.sedf.period    = EDOM_INFO(p->exec_domain[0])->period;
-               cmd->u.sedf.slice     = EDOM_INFO(p->exec_domain[0])->slice;
-               cmd->u.sedf.extratime = EDOM_INFO(p->exec_domain[0])->status
-                                           & EXTRA_AWARE;
-               cmd->u.sedf.latency   = EDOM_INFO(p->exec_domain[0])->latency;
-               cmd->u.sedf.weight    = EDOM_INFO(p->exec_domain[0])->weight;
-       }
-       PRINT(2,"sedf_adjdom_finished\n");
-       return 0;
+    PRINT(2,"sedf_adjdom was called, domain-id %i new period %"PRIu64" "\
+          "new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
+          p->domain_id, cmd->u.sedf.period, cmd->u.sedf.slice,
+          cmd->u.sedf.latency, (cmd->u.sedf.extratime)?"yes":"no");
+    if ( cmd->direction == SCHED_INFO_PUT )
+    {
+        /*check for sane parameters*/
+        if (!cmd->u.sedf.period && !cmd->u.sedf.weight)
+            return -EINVAL;
+        if (cmd->u.sedf.weight) {
+            if ((cmd->u.sedf.extratime & EXTRA_AWARE) &&
+                (! cmd->u.sedf.period)) {
+                /*weight driven domains with xtime ONLY!*/
+                for_each_exec_domain(p, ed) {
+                    EDOM_INFO(ed)->extraweight = cmd->u.sedf.weight;
+                    EDOM_INFO(ed)->weight = 0;
+                    EDOM_INFO(ed)->slice = 0;
+                    EDOM_INFO(ed)->period = WEIGHT_PERIOD;
+                }
+            } else {
+                /*weight driven domains with real-time execution*/
+                for_each_exec_domain(p, ed)
+                    EDOM_INFO(ed)->weight = cmd->u.sedf.weight;
+            }
+        }
+        else {
+            /*time driven domains*/
+            for_each_exec_domain(p, ed) {
+                /* sanity checking! */
+                if(cmd->u.sedf.slice > cmd->u.sedf.period )
+                    return -EINVAL;
+                EDOM_INFO(ed)->weight = 0;
+                EDOM_INFO(ed)->extraweight = 0;
+                EDOM_INFO(ed)->period_orig = 
+                    EDOM_INFO(ed)->period   = cmd->u.sedf.period;
+                EDOM_INFO(ed)->slice_orig  = 
+                    EDOM_INFO(ed)->slice    = cmd->u.sedf.slice;
+            }
+        }
+        if (sedf_adjust_weights(cmd))
+            return -EINVAL;
+   
+        for_each_exec_domain(p, ed) {
+            EDOM_INFO(ed)->status  = 
+                (EDOM_INFO(ed)->status &
+                 ~EXTRA_AWARE) | (cmd->u.sedf.extratime & EXTRA_AWARE);
+            EDOM_INFO(ed)->latency = cmd->u.sedf.latency;
+            extraq_check(ed);
+        }
+    }
+    else if ( cmd->direction == SCHED_INFO_GET )
+    {
+        cmd->u.sedf.period    = EDOM_INFO(p->exec_domain[0])->period;
+        cmd->u.sedf.slice     = EDOM_INFO(p->exec_domain[0])->slice;
+        cmd->u.sedf.extratime = EDOM_INFO(p->exec_domain[0])->status
+            & EXTRA_AWARE;
+        cmd->u.sedf.latency   = EDOM_INFO(p->exec_domain[0])->latency;
+        cmd->u.sedf.weight    = EDOM_INFO(p->exec_domain[0])->weight;
+    }
+    PRINT(2,"sedf_adjdom_finished\n");
+    return 0;
 }
 
 struct scheduler sched_sedf_def = {
@@ -1464,11 +1439,9 @@ struct scheduler sched_sedf_def = {
     .opt_name = "sedf",
     .sched_id = SCHED_SEDF,
     
-    .init_idle_task = sedf_init_idle_task,
     .alloc_task     = sedf_alloc_task,
     .add_task       = sedf_add_task,
     .free_task      = sedf_free_task,
-    .init_scheduler = sedf_init_scheduler,
     .do_schedule    = sedf_do_schedule,
     .dump_cpu_state = sedf_dump_cpu_state,
     .sleep          = sedf_sleep,
index d7ba0a078cbed4237eadf995ca58c485bb0a62f8..ec974657e27949287f75c90e5a0d0f1624067345 100644 (file)
 static char opt_sched[10] = "bvt";
 string_param("sched", opt_sched);
 
-/*#define WAKE_HISTO*/
-/*#define BLOCKTIME_HISTO*/
-/*#define ADV_SCHED_HISTO*/
-//#include <xen/adv_sched_hist.h>
-
 #if defined(WAKE_HISTO)
 #define BUCKETS 31
 #elif defined(BLOCKTIME_HISTO)
@@ -93,8 +88,8 @@ void free_domain_struct(struct domain *d)
     xfree(d);
 }
 
-struct exec_domain *alloc_exec_domain_struct(struct domain *d,
-                                             unsigned long vcpu)
+struct exec_domain *alloc_exec_domain_struct(
+    struct domain *d, unsigned long vcpu)
 {
     struct exec_domain *ed, *edc;
 
@@ -126,10 +121,10 @@ struct exec_domain *alloc_exec_domain_struct(struct domain *d,
         edc->next_in_list = ed;
 
         if (test_bit(_VCPUF_cpu_pinned, &edc->vcpu_flags)) {
-            ed->processor = (edc->processor + 1) % smp_num_cpus;
+            ed->processor = (edc->processor + 1) % num_online_cpus();
             set_bit(_VCPUF_cpu_pinned, &ed->vcpu_flags);
         } else {
-            ed->processor = (edc->processor + 1) % smp_num_cpus;  /* XXX */
+            ed->processor = (edc->processor + 1) % num_online_cpus();
         }
     }
 
@@ -168,20 +163,22 @@ void sched_add_domain(struct exec_domain *ed)
 {
     struct domain *d = ed->domain;
 
-    /* Must be unpaused by control software to start execution. */
-    set_bit(_VCPUF_ctrl_pause, &ed->vcpu_flags);
+    /* Initialise the per-domain timer. */
+    init_ac_timer(&ed->timer);
+    ed->timer.cpu      = ed->processor;
+    ed->timer.data     = (unsigned long)ed;
+    ed->timer.function = &dom_timer_fn;
 
-    if ( d->domain_id != IDLE_DOMAIN_ID )
+    if ( is_idle_task(d) )
     {
-        /* Initialise the per-domain timer. */
-        init_ac_timer(&ed->timer);
-        ed->timer.cpu      = ed->processor;
-        ed->timer.data     = (unsigned long)ed;
-        ed->timer.function = &dom_timer_fn;
+        schedule_data[ed->processor].curr = ed;
+        schedule_data[ed->processor].idle = ed;
+        set_bit(_VCPUF_running, &ed->vcpu_flags);
     }
     else
     {
-        schedule_data[ed->processor].idle = ed;
+        /* Must be unpaused by control software to start execution. */
+        set_bit(_VCPUF_ctrl_pause, &ed->vcpu_flags);
     }
 
     SCHED_OP(add_task, ed);
@@ -195,12 +192,6 @@ void sched_rem_domain(struct exec_domain *ed)
     TRACE_2D(TRC_SCHED_DOM_REM, ed->domain->domain_id, ed->vcpu_id);
 }
 
-void init_idle_task(void)
-{
-    if ( SCHED_OP(init_idle_task, current) < 0 )
-        BUG();
-}
-
 void domain_sleep(struct exec_domain *ed)
 {
     unsigned long flags;
@@ -240,10 +231,6 @@ long do_block(void)
 {
     struct exec_domain *ed = current;
 
-#ifdef ADV_SCHED_HISTO
-    adv_sched_hist_start(current->processor);
-#endif
-
     ed->vcpu_info->evtchn_upcall_mask = 0;
     set_bit(_VCPUF_blocked, &ed->vcpu_flags);
 
@@ -264,10 +251,6 @@ long do_block(void)
 /* Voluntarily yield the processor for this allocation. */
 static long do_yield(void)
 {
-#ifdef ADV_SCHED_HISTO
-    adv_sched_hist_start(current->processor);
-#endif
-    
     TRACE_2D(TRC_SCHED_YIELD, current->domain->domain_id, current->vcpu_id);
     __enter_scheduler();
     return 0;
@@ -422,13 +405,7 @@ static void __enter_scheduler(void)
     
     spin_lock_irq(&schedule_data[cpu].schedule_lock);
 
-#ifdef ADV_SCHED_HISTO
-    adv_sched_hist_from_stop(cpu);
-#endif
     now = NOW();
-#ifdef ADV_SCHED_HISTO
-    adv_sched_hist_start(cpu);
-#endif
 
     rem_ac_timer(&schedule_data[cpu].s_timer);
     
@@ -447,7 +424,7 @@ static void __enter_scheduler(void)
     next->lastschd = now;
 
     /* reprogramm the timer */
-    schedule_data[cpu].s_timer.expires  = now + r_time;
+    schedule_data[cpu].s_timer.expires = now + r_time;
     add_ac_timer(&schedule_data[cpu].s_timer);
 
     /* Must be protected by the schedule_lock! */
@@ -455,12 +432,9 @@ static void __enter_scheduler(void)
 
     spin_unlock_irq(&schedule_data[cpu].schedule_lock);
 
-    if ( unlikely(prev == next) ) {
-#ifdef ADV_SCHED_HISTO
-        adv_sched_hist_to_stop(cpu);
-#endif
+    if ( unlikely(prev == next) )
         return continue_running(prev);
-    }
+
     perfc_incrc(sched_ctx);
 
 #if defined(WAKE_HISTO)
@@ -495,10 +469,6 @@ static void __enter_scheduler(void)
              prev->domain->domain_id, prev->vcpu_id,
              next->domain->domain_id, next->vcpu_id);
 
-#ifdef ADV_SCHED_HISTO
-    adv_sched_hist_to_stop(cpu);
-#endif
-
     context_switch(prev, next);
 }
 
@@ -520,10 +490,6 @@ int idle_cpu(int cpu)
 /* The scheduler timer: force a run through the scheduler */
 static void s_timer_fn(unsigned long unused)
 {
-#ifdef ADV_SCHED_HISTO
-    adv_sched_hist_start(current->processor);
-#endif
-
     raise_softirq(SCHEDULE_SOFTIRQ);
     perfc_incrc(sched_irq);
 }
@@ -567,8 +533,7 @@ void __init scheduler_init(void)
     for ( i = 0; i < NR_CPUS; i++ )
     {
         spin_lock_init(&schedule_data[i].schedule_lock);
-        schedule_data[i].curr = &idle0_exec_domain;
-        
+
         init_ac_timer(&schedule_data[i].s_timer);
         schedule_data[i].s_timer.cpu      = i;
         schedule_data[i].s_timer.data     = 2;
@@ -580,7 +545,8 @@ void __init scheduler_init(void)
         t_timer[i].function = &t_timer_fn;
     }
 
-    schedule_data[0].idle = &idle0_exec_domain;
+    schedule_data[0].curr = idle_task[0];
+    schedule_data[0].idle = idle_task[0];
 
     for ( i = 0; schedulers[i] != NULL; i++ )
     {
@@ -594,8 +560,8 @@ void __init scheduler_init(void)
 
     printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
 
-    if ( SCHED_OP(init_scheduler) < 0 )
-        panic("Initialising scheduler failed!");
+    BUG_ON(SCHED_OP(alloc_task, idle_task[0]) < 0);
+    sched_add_domain(idle_task[0]);
 }
 
 /*
@@ -604,14 +570,10 @@ void __init scheduler_init(void)
  */
 void schedulers_start(void) 
 {   
-    s_timer_fn(0);
-    smp_call_function((void *)s_timer_fn, NULL, 1, 1);
-
     t_timer_fn(0);
     smp_call_function((void *)t_timer_fn, NULL, 1, 1);
 }
 
-
 void dump_runq(unsigned char key)
 {
     s_time_t      now = NOW();
@@ -624,7 +586,7 @@ void dump_runq(unsigned char key)
     SCHED_OP(dump_settings);
     printk("NOW=0x%08X%08X\n",  (u32)(now>>32), (u32)now); 
 
-    for ( i = 0; i < smp_num_cpus; i++ )
+    for_each_online_cpu ( i )
     {
         spin_lock(&schedule_data[i].schedule_lock);
         printk("CPU[%02d] ", i);
@@ -636,10 +598,11 @@ void dump_runq(unsigned char key)
 }
 
 #if defined(WAKE_HISTO) || defined(BLOCKTIME_HISTO)
+
 void print_sched_histo(unsigned char key)
 {
     int i, j, k;
-    for ( k = 0; k < smp_num_cpus; k++ )
+    for_each_online_cpu ( k )
     {
         j = 0;
         printf ("CPU[%02d]: scheduler latency histogram (ms:[count])\n", k);
@@ -659,73 +622,20 @@ void print_sched_histo(unsigned char key)
     }
       
 }
+
 void reset_sched_histo(unsigned char key)
 {
     int i, j;
-    for ( j = 0; j < smp_num_cpus; j++ )
+    for ( j = 0; j < NR_CPUS; j++ )
         for ( i=0; i < BUCKETS; i++ ) 
             schedule_data[j].hist[i] = 0;
 }
+
 #else
-#if defined(ADV_SCHED_HISTO)
-void print_sched_histo(unsigned char key)
-{
-    int i, j, k,t;
-    printf("Hello!\n");
-    for ( k = 0; k < smp_num_cpus; k++ )
-    {
-        j = 0;
-       t = 0;
-        printf ("CPU[%02d]: scheduler latency histogram FROM (ms:[count])\n", k);
-        for ( i = 0; i < BUCKETS; i++ )
-        {
-            //if ( schedule_data[k].hist[i] != 0 )
-            {
-               t += schedule_data[k].from_hist[i];
-                if ( i < BUCKETS-1 )
-                    printk("%3d:[%7u]    ", i, schedule_data[k].from_hist[i]);
-                else
-                    printk(" >:[%7u]    ", schedule_data[k].from_hist[i]);
-                //if ( !(++j % 5) )
-                    printk("\n");
-            }
-        }
-        printk("\nTotal: %i\n",t);
-    }
-    for ( k = 0; k < smp_num_cpus; k++ )
-    {
-        j = 0; t = 0;
-        printf ("CPU[%02d]: scheduler latency histogram TO (ms:[count])\n", k);
-        for ( i = 0; i < BUCKETS; i++ )
-        {
-            //if ( schedule_data[k].hist[i] != 0 )
-            {
-               t += schedule_data[k].from_hist[i];
-                if ( i < BUCKETS-1 )
-                    printk("%3d:[%7u]    ", i, schedule_data[k].to_hist[i]);
-                else
-                    printk(" >:[%7u]    ", schedule_data[k].to_hist[i]);
-                //if ( !(++j % 5) )
-                    printk("\n");
-            }
-        }
-       printk("\nTotal: %i\n",t);
-    }
-      
-}
-void reset_sched_histo(unsigned char key)
-{
-    int i, j;
-    for ( j = 0; j < smp_num_cpus; j++ ) {
-        for ( i=0; i < BUCKETS; i++ ) 
-            schedule_data[j].to_hist[i] = schedule_data[j].from_hist[i] = 0;
-        schedule_data[j].save_tsc = 0;
-    }
-}
-#else
+
 void print_sched_histo(unsigned char key) { }
 void reset_sched_histo(unsigned char key) { }
-#endif
+
 #endif
 
 /*
index 48da9a7eb738251637648b76f19624415ed99496..952a2f958358976bbdaeb62a6470e6f0348e037d 100644 (file)
@@ -66,7 +66,7 @@ void init_trace_bufs(void)
         return;
     }
 
-    nr_pages = smp_num_cpus * opt_tbuf_size;
+    nr_pages = num_online_cpus() * opt_tbuf_size;
     order    = get_order(nr_pages * PAGE_SIZE);
     
     if ( (rawbuf = (char *)alloc_xenheap_pages(order)) == NULL )
@@ -79,7 +79,7 @@ void init_trace_bufs(void)
     for ( i = 0; i < nr_pages; i++ )
         SHARE_PFN_WITH_DOMAIN(virt_to_page(rawbuf + i * PAGE_SIZE), dom0);
     
-    for ( i = 0; i < smp_num_cpus; i++ )
+    for_each_online_cpu ( i )
     {
         buf = t_bufs[i] = (struct t_buf *)&rawbuf[i*opt_tbuf_size*PAGE_SIZE];
         
index 0fb3e4472751343963cae24a8532462e8c91cce7..17777ad123a32bf3f581095af59ffd6d6f8dc7a0 100644 (file)
@@ -6,8 +6,10 @@
 #include <asm/asm-offsets.h>
 #include <asm/processor.h>
 
+#ifndef STR
 #define __STR(x) #x
 #define STR(x) __STR(x)
+#endif
 
 #ifdef __x86_64__
 #include <asm/x86_64/asm_defns.h>
index 2337197670642695d640e9338359e58af53ed480..500d02f38f1fcfe0cbf601f948f3b5e59fa68584 100644 (file)
@@ -7,6 +7,11 @@
 
 #include <xen/config.h>
 
+#ifndef STR
+#define __STR(x) #x
+#define STR(x) __STR(x)
+#endif
+
 /*
  * These have to be done with inline assembly: that way the bit-setting
  * is guaranteed to be atomic. All bit operations return 0 if the bit
@@ -246,29 +251,28 @@ static __inline__ int variable_test_bit(long nr, volatile void * addr)
 /**
  * find_first_zero_bit - find the first zero bit in a memory region
  * @addr: The address to start the search at
- * @size: The maximum bitnumber to search
+ * @size: The maximum size to search
  *
  * Returns the bit-number of the first zero bit, not the number of the byte
- * containing a bit. -1 when none found.
+ * containing a bit.
  */
-static __inline__ int find_first_zero_bit(void * addr, unsigned size)
+static inline long find_first_zero_bit(
+    const unsigned long *addr, unsigned size)
 {
-       int d0, d1, d2;
-       int res;
+       long d0, d1, d2;
+       long res;
 
-       if (!size)
-               return 0;
        __asm__ __volatile__(
-               "movl $-1,%%eax\n\t"
-               "xorl %%edx,%%edx\n\t"
-               "repe; scasl\n\t"
+               "mov $-1,%%"__OP"ax\n\t"
+               "xor %%edx,%%edx\n\t"
+               "repe; scas"__OS"\n\t"
                "je 1f\n\t"
-               "xorl -4(%%"__OP"di),%%eax\n\t"
-               "sub"__OS" $4,%%"__OP"di\n\t"
-               "bsfl %%eax,%%edx\n"
-               "1:\tsub"__OS" %%"__OP"bx,%%"__OP"di\n\t"
-               "shl"__OS" $3,%%"__OP"di\n\t"
-               "add"__OS" %%"__OP"di,%%"__OP"dx"
+               "lea -"STR(BITS_PER_LONG/8)"(%%"__OP"di),%%"__OP"di\n\t"
+               "xor (%%"__OP"di),%%"__OP"ax\n\t"
+               "bsf %%"__OP"ax,%%"__OP"dx\n"
+               "1:\tsub %%"__OP"bx,%%"__OP"di\n\t"
+               "shl $3,%%"__OP"di\n\t"
+               "add %%"__OP"di,%%"__OP"dx"
                :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
                :"1" ((size + 31) >> 5), "2" (addr), "b" (addr) : "memory");
        return res;
@@ -280,66 +284,72 @@ static __inline__ int find_first_zero_bit(void * addr, unsigned size)
  * @offset: The bitnumber to start searching at
  * @size: The maximum size to search
  */
-static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
-{
-       unsigned int * p = ((unsigned int *) addr) + (offset >> 5);
-       int set = 0, bit = offset & 31, res;
-       
-       if (bit) {
-               /*
-                * Look for zero in first byte
-                */
-               __asm__("bsfl %1,%0\n\t"
-                       "jne 1f\n\t"
-                       "movl $32, %0\n"
-                       "1:"
-                       : "=r" (set)
-                       : "r" (~(*p >> bit)));
-               if (set < (32 - bit))
-                       return set + offset;
-               set = 32 - bit;
-               p++;
-       }
-       /*
-        * No zero yet, search remaining full bytes for a zero
-        */
-       res = find_first_zero_bit (p, size - 32 * (p - (unsigned int *) addr));
-       return (offset + set + res);
-}
+long find_next_zero_bit(const unsigned long *addr, int size, int offset);
 
 /**
- * ffz - find first zero in word.
- * @word: The word to search
+ * find_first_bit - find the first set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
  *
- * Undefined if no zero exists, so code should check against ~0UL first.
+ * Returns the bit-number of the first set bit, not the number of the byte
+ * containing a bit.
  */
-static __inline__ unsigned long ffz(unsigned long word)
+static inline long find_first_bit(
+    const unsigned long *addr, unsigned size)
 {
-       __asm__("bsf"__OS" %1,%0"
-               :"=r" (word)
-               :"r" (~word));
-       return word;
+       long d0, d1;
+       long res;
+
+       __asm__ __volatile__(
+               "xor %%eax,%%eax\n\t"
+               "repe; scas"__OS"\n\t"
+               "je 1f\n\t"
+               "lea -"STR(BITS_PER_LONG/8)"(%%"__OP"di),%%"__OP"di\n\t"
+               "bsf (%%"__OP"di),%%"__OP"ax\n"
+               "1:\tsub %%"__OP"bx,%%"__OP"di\n\t"
+               "shl $3,%%"__OP"di\n\t"
+               "add %%"__OP"di,%%"__OP"ax"
+               :"=a" (res), "=&c" (d0), "=&D" (d1)
+               :"1" ((size + 31) >> 5), "2" (addr), "b" (addr) : "memory");
+       return res;
 }
 
 /**
- * ffs - find first bit set
- * @x: the word to search
- *
- * This is defined the same way as
- * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz (man ffs).
+ * find_next_bit - find the first set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
  */
-static __inline__ int ffs(int x)
-{
-       int r;
+long find_next_bit(const unsigned long *addr, int size, int offset);
 
-       __asm__("bsfl %1,%0\n\t"
-               "jnz 1f\n\t"
-               "movl $-1,%0\n"
-               "1:" : "=r" (r) : "g" (x));
-       return r+1;
+/* return index of first bet set in val or max when no bit is set */
+static inline unsigned long __scanbit(unsigned long val, unsigned long max)
+{
+       asm("bsf %1,%0 ; cmovz %2,%0" : "=&r" (val) : "r" (val), "r" (max));
+       return val;
 }
 
+#define find_first_bit(addr,size) \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
+  (__scanbit(*(unsigned long *)addr,(size))) : \
+  find_first_bit(addr,size)))
+
+#define find_next_bit(addr,size,off) \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ?         \
+  ((off) + (__scanbit((*(unsigned long *)addr) >> (off),(size)-(off)))) : \
+  find_next_bit(addr,size,off)))
+
+#define find_first_zero_bit(addr,size) \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
+  (__scanbit(~*(unsigned long *)addr,(size))) : \
+  find_first_zero_bit(addr,size)))
+        
+#define find_next_zero_bit(addr,size,off) \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ?         \
+  ((off)+(__scanbit(~(((*(unsigned long *)addr)) >> (off)),(size)-(off)))) : \
+  find_next_zero_bit(addr,size,off)))
+
+
 /*
  * These are the preferred 'find first' functions in Xen.
  * Both return the appropriate bit index, with the l.s.b. having index 0.
index ef915df700e4a90b13e3d3af0b143c206d5435b2..28ed8b296afc137af6b3043179fc97eb230a2f54 100644 (file)
@@ -1,17 +1,48 @@
 #ifndef __I386_DIV64
 #define __I386_DIV64
 
+/*
+ * do_div() is NOT a C function. It wants to return
+ * two values (the quotient and the remainder), but
+ * since that doesn't work very well in C, what it
+ * does is:
+ *
+ * - modifies the 64-bit dividend _in_place_
+ * - returns the 32-bit remainder
+ *
+ * This ends up being the most efficient "calling
+ * convention" on x86.
+ */
 #define do_div(n,base) ({ \
-       unsigned long __upper, __low, __high, __mod; \
+       unsigned long __upper, __low, __high, __mod, __base; \
+       __base = (base); \
        asm("":"=a" (__low), "=d" (__high):"A" (n)); \
        __upper = __high; \
        if (__high) { \
-               __upper = __high % (base); \
-               __high = __high / (base); \
+               __upper = __high % (__base); \
+               __high = __high / (__base); \
        } \
-       asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (base), "0" (__low), "1" (__upper)); \
+       asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (__base), "0" (__low), "1" (__upper)); \
        asm("":"=A" (n):"a" (__low),"d" (__high)); \
        __mod; \
 })
 
+/*
+ * (long)X = ((long long)divs) / (long)div
+ * (long)rem = ((long long)divs) % (long)div
+ *
+ * Warning, this will do an exception if X overflows.
+ */
+#define div_long_long_rem(a,b,c) div_ll_X_l_rem(a,b,c)
+
+extern inline long
+div_ll_X_l_rem(long long divs, long div, long *rem)
+{
+       long dum2;
+      __asm__("divl %2":"=a"(dum2), "=d"(*rem)
+      :        "rm"(div), "A"(divs));
+
+       return dum2;
+
+}
 #endif
index 8f48465cb211c8c7286686ded9f7024afb83bd92..810bf345b754c69db084330d4c16d898ad00ebfe 100644 (file)
@@ -93,7 +93,7 @@ extern void write_cr3(unsigned long cr3);
 #define local_flush_tlb_one(__addr) \
     __asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr)))
 
-#define flush_tlb_all()     flush_tlb_mask((1 << smp_num_cpus) - 1)
+#define flush_tlb_all()     flush_tlb_mask((1 << num_online_cpus()) - 1)
 
 #ifndef CONFIG_SMP
 #define flush_tlb_all_pge()          local_flush_tlb_pge()
index 6036e849c52334dc891e36586646ca0c514e61e0..97f143ad44a053c4d52e56d4dec1689d74030a12 100644 (file)
@@ -21,38 +21,31 @@ extern void (*interrupt[NR_IRQS])(void);
 
 #define platform_legacy_irq(irq)       ((irq) < 16)
 
-extern void mask_irq(unsigned int irq);
-extern void unmask_irq(unsigned int irq);
-extern void disable_8259A_irq(unsigned int irq);
-extern void enable_8259A_irq(unsigned int irq);
-extern int i8259A_irq_pending(unsigned int irq);
-extern void make_8259A_irq(unsigned int irq);
-extern void init_8259A(int aeoi);
-extern void send_IPI_self(int vector);
-extern void init_VISWS_APIC_irqs(void);
-extern void setup_IO_APIC(void);
-extern void disable_IO_APIC(void);
-extern void print_IO_APIC(void);
-extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
-extern void send_IPI(int dest, int vector);
+void disable_8259A_irq(unsigned int irq);
+void enable_8259A_irq(unsigned int irq);
+int i8259A_irq_pending(unsigned int irq);
+void make_8259A_irq(unsigned int irq);
+void init_8259A(int aeoi);
+void send_IPI_self(int vector);
+void init_VISWS_APIC_irqs(void);
+void setup_IO_APIC(void);
+void disable_IO_APIC(void);
+void print_IO_APIC(void);
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
+void send_IPI(int dest, int vector);
+void setup_ioapic_dest(void);
 
 extern unsigned long io_apic_irqs;
 
 extern atomic_t irq_err_count;
 extern atomic_t irq_mis_count;
 
-extern char _stext, _etext;
-
 #define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
 
-#include <xen/irq.h>
-
 static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i)
 {
-#if defined(CONFIG_X86_IO_APIC)
     if (IO_APIC_IRQ(i))
         send_IPI_self(IO_APIC_VECTOR(i));
-#endif
 }
 
 #endif /* _ASM_HW_IRQ_H */
index 907c820c12d7c2172d360842a14b18beefdc0b07..94b82d4ba32c8fb63f60a34ae85fcb6d6ba10528 100644 (file)
@@ -179,6 +179,7 @@ extern struct cpuinfo_x86 cpu_data[];
 #define current_cpu_data boot_cpu_data
 #endif
 
+extern  int phys_proc_id[NR_CPUS];
 extern char ignore_irq13;
 
 extern void identify_cpu(struct cpuinfo_x86 *);
index c91a10aef0c36fab6b3d51b8dc829381163f1263..3703384c3ddaf6464c5e561532510f7cce7c5f69 100644 (file)
@@ -6,6 +6,7 @@
 #include <xen/config.h>
 #include <xen/lib.h>
 #include <xen/types.h>
+#include <xen/bitops.h>
 
 /*
  * bitmaps provide bit arrays that consume one or more unsigned
index 48814478429594ab2d447bfc14c114c856e04d51..f4ec7a24369574cb805294379f08f167ca587c06 100644 (file)
+#ifndef __XEN_CPUMASK_H
+#define __XEN_CPUMASK_H
+
 /*
- * XXX This to be replaced with the Linux file in the near future.
+ * Cpumasks provide a bitmap suitable for representing the
+ * set of CPU's in a system, one bit position per CPU number.
+ *
+ * See detailed comments in the file xen/bitmap.h describing the
+ * data type on which these cpumasks are based.
+ *
+ * For details of cpumask_scnprintf() and cpumask_parse(),
+ * see bitmap_scnprintf() and bitmap_parse() in lib/bitmap.c.
+ *
+ * The available cpumask operations are:
+ *
+ * void cpu_set(cpu, mask)             turn on bit 'cpu' in mask
+ * void cpu_clear(cpu, mask)           turn off bit 'cpu' in mask
+ * void cpus_setall(mask)              set all bits
+ * void cpus_clear(mask)               clear all bits
+ * int cpu_isset(cpu, mask)            true iff bit 'cpu' set in mask
+ * int cpu_test_and_set(cpu, mask)     test and set bit 'cpu' in mask
+ *
+ * void cpus_and(dst, src1, src2)      dst = src1 & src2  [intersection]
+ * void cpus_or(dst, src1, src2)       dst = src1 | src2  [union]
+ * void cpus_xor(dst, src1, src2)      dst = src1 ^ src2
+ * void cpus_andnot(dst, src1, src2)   dst = src1 & ~src2
+ * void cpus_complement(dst, src)      dst = ~src
+ *
+ * int cpus_equal(mask1, mask2)                Does mask1 == mask2?
+ * int cpus_intersects(mask1, mask2)   Do mask1 and mask2 intersect?
+ * int cpus_subset(mask1, mask2)       Is mask1 a subset of mask2?
+ * int cpus_empty(mask)                        Is mask empty (no bits sets)?
+ * int cpus_full(mask)                 Is mask full (all bits sets)?
+ * int cpus_weight(mask)               Hamming weigh - number of set bits
+ *
+ * void cpus_shift_right(dst, src, n)  Shift right
+ * void cpus_shift_left(dst, src, n)   Shift left
+ *
+ * int first_cpu(mask)                 Number lowest set bit, or NR_CPUS
+ * int next_cpu(cpu, mask)             Next cpu past 'cpu', or NR_CPUS
+ *
+ * cpumask_t cpumask_of_cpu(cpu)       Return cpumask with bit 'cpu' set
+ * CPU_MASK_ALL                                Initializer - all bits set
+ * CPU_MASK_NONE                       Initializer - no bits set
+ * unsigned long *cpus_addr(mask)      Array of unsigned long's in mask
+ *
+ * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing
+ * int cpumask_parse(ubuf, ulen, mask) Parse ascii string as cpumask
+ *
+ * for_each_cpu_mask(cpu, mask)                for-loop cpu over mask
+ *
+ * int num_online_cpus()               Number of online CPUs
+ * int num_possible_cpus()             Number of all possible CPUs
+ * int num_present_cpus()              Number of present CPUs
+ *
+ * int cpu_online(cpu)                 Is some cpu online?
+ * int cpu_possible(cpu)               Is some cpu possible?
+ * int cpu_present(cpu)                        Is some cpu present (can schedule)?
+ *
+ * int any_online_cpu(mask)            First online cpu in mask
+ *
+ * for_each_cpu(cpu)                   for-loop cpu over cpu_possible_map
+ * for_each_online_cpu(cpu)            for-loop cpu over cpu_online_map
+ * for_each_present_cpu(cpu)           for-loop cpu over cpu_present_map
+ *
+ * Subtlety:
+ * 1) The 'type-checked' form of cpu_isset() causes gcc (3.3.2, anyway)
+ *    to generate slightly worse code.  Note for example the additional
+ *    40 lines of assembly code compiling the "for each possible cpu"
+ *    loops buried in the disk_stat_read() macros calls when compiling
+ *    drivers/block/genhd.c (arch i386, CONFIG_SMP=y).  So use a simple
+ *    one-line #define for cpu_isset(), instead of wrapping an inline
+ *    inside a macro, the way we do the other calls.
  */
 
-#ifndef __XEN_CPUMASK_H__
-#define __XEN_CPUMASK_H__
-
+#include <xen/config.h>
 #include <xen/bitmap.h>
+#include <xen/kernel.h>
 
-typedef u32 cpumask_t;
+typedef struct { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
+extern cpumask_t _unused_cpumask_arg_;
+
+#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
+static inline void __cpu_set(int cpu, volatile cpumask_t *dstp)
+{
+       set_bit(cpu, dstp->bits);
+}
+
+#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst))
+static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp)
+{
+       clear_bit(cpu, dstp->bits);
+}
+
+#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS)
+static inline void __cpus_setall(cpumask_t *dstp, int nbits)
+{
+       bitmap_fill(dstp->bits, nbits);
+}
+
+#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS)
+static inline void __cpus_clear(cpumask_t *dstp, int nbits)
+{
+       bitmap_zero(dstp->bits, nbits);
+}
+
+/* No static inline type checking - see Subtlety (1) above. */
+#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits)
+
+#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask))
+static inline int __cpu_test_and_set(int cpu, cpumask_t *addr)
+{
+       return test_and_set_bit(cpu, addr->bits);
+}
+
+#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_and(cpumask_t *dstp, const cpumask_t *src1p,
+                                       const cpumask_t *src2p, int nbits)
+{
+       bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p,
+                                       const cpumask_t *src2p, int nbits)
+{
+       bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p,
+                                       const cpumask_t *src2p, int nbits)
+{
+       bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_andnot(dst, src1, src2) \
+                               __cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p,
+                                       const cpumask_t *src2p, int nbits)
+{
+       bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_complement(dst, src) __cpus_complement(&(dst), &(src), NR_CPUS)
+static inline void __cpus_complement(cpumask_t *dstp,
+                                       const cpumask_t *srcp, int nbits)
+{
+       bitmap_complement(dstp->bits, srcp->bits, nbits);
+}
+
+#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_equal(const cpumask_t *src1p,
+                                       const cpumask_t *src2p, int nbits)
+{
+       return bitmap_equal(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_intersects(const cpumask_t *src1p,
+                                       const cpumask_t *src2p, int nbits)
+{
+       return bitmap_intersects(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_subset(const cpumask_t *src1p,
+                                       const cpumask_t *src2p, int nbits)
+{
+       return bitmap_subset(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS)
+static inline int __cpus_empty(const cpumask_t *srcp, int nbits)
+{
+       return bitmap_empty(srcp->bits, nbits);
+}
+
+#define cpus_full(cpumask) __cpus_full(&(cpumask), NR_CPUS)
+static inline int __cpus_full(const cpumask_t *srcp, int nbits)
+{
+       return bitmap_full(srcp->bits, nbits);
+}
+
+#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS)
+static inline int __cpus_weight(const cpumask_t *srcp, int nbits)
+{
+       return bitmap_weight(srcp->bits, nbits);
+}
+
+#define cpus_shift_right(dst, src, n) \
+                       __cpus_shift_right(&(dst), &(src), (n), NR_CPUS)
+static inline void __cpus_shift_right(cpumask_t *dstp,
+                                       const cpumask_t *srcp, int n, int nbits)
+{
+       bitmap_shift_right(dstp->bits, srcp->bits, n, nbits);
+}
+
+#define cpus_shift_left(dst, src, n) \
+                       __cpus_shift_left(&(dst), &(src), (n), NR_CPUS)
+static inline void __cpus_shift_left(cpumask_t *dstp,
+                                       const cpumask_t *srcp, int n, int nbits)
+{
+       bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
+}
+
+#define first_cpu(src) __first_cpu(&(src), NR_CPUS)
+static inline int __first_cpu(const cpumask_t *srcp, int nbits)
+{
+       return min_t(int, nbits, find_first_bit(srcp->bits, nbits));
+}
+
+#define next_cpu(n, src) __next_cpu((n), &(src), NR_CPUS)
+static inline int __next_cpu(int n, const cpumask_t *srcp, int nbits)
+{
+       return min_t(int, nbits, find_next_bit(srcp->bits, nbits, n+1));
+}
+
+#define cpumask_of_cpu(cpu)                                            \
+({                                                                     \
+       typeof(_unused_cpumask_arg_) m;                                 \
+       if (sizeof(m) == sizeof(unsigned long)) {                       \
+               m.bits[0] = 1UL<<(cpu);                                 \
+       } else {                                                        \
+               cpus_clear(m);                                          \
+               cpu_set((cpu), m);                                      \
+       }                                                               \
+       m;                                                              \
+})
+
+#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
+
+#if NR_CPUS <= BITS_PER_LONG
+
+#define CPU_MASK_ALL                                                   \
+(cpumask_t) { {                                                                \
+       [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD                 \
+} }
+
+#else
+
+#define CPU_MASK_ALL                                                   \
+(cpumask_t) { {                                                                \
+       [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,                        \
+       [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD                 \
+} }
 
-#ifndef cpu_online_map
-extern cpumask_t cpu_online_map;
 #endif
 
-static inline int cpus_weight(cpumask_t w)
+#define CPU_MASK_NONE                                                  \
+(cpumask_t) { {                                                                \
+       [0 ... BITS_TO_LONGS(NR_CPUS)-1] =  0UL                         \
+} }
+
+#define CPU_MASK_CPU0                                                  \
+(cpumask_t) { {                                                                \
+       [0] =  1UL                                                      \
+} }
+
+#define cpus_addr(src) ((src).bits)
+
+/*
+#define cpumask_scnprintf(buf, len, src) \
+                       __cpumask_scnprintf((buf), (len), &(src), NR_CPUS)
+static inline int __cpumask_scnprintf(char *buf, int len,
+                                       const cpumask_t *srcp, int nbits)
 {
-    unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
-    res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
-    res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
-    res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
-    return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
+       return bitmap_scnprintf(buf, len, srcp->bits, nbits);
 }
 
-#define cpus_addr(_m) (&(_m))
+#define cpumask_parse(ubuf, ulen, src) \
+                       __cpumask_parse((ubuf), (ulen), &(src), NR_CPUS)
+static inline int __cpumask_parse(const char __user *buf, int len,
+                                       cpumask_t *dstp, int nbits)
+{
+       return bitmap_parse(buf, len, dstp->bits, nbits);
+}
+*/
+
+#if NR_CPUS > 1
+#define for_each_cpu_mask(cpu, mask)           \
+       for ((cpu) = first_cpu(mask);           \
+               (cpu) < NR_CPUS;                \
+               (cpu) = next_cpu((cpu), (mask)))
+#else /* NR_CPUS == 1 */
+#define for_each_cpu_mask(cpu, mask) for ((cpu) = 0; (cpu) < 1; (cpu)++)
+#endif /* NR_CPUS */
+
+/*
+ * The following particular system cpumasks and operations manage
+ * possible, present and online cpus.  Each of them is a fixed size
+ * bitmap of size NR_CPUS.
+ *
+ *  #ifdef CONFIG_HOTPLUG_CPU
+ *     cpu_possible_map - all NR_CPUS bits set
+ *     cpu_present_map  - has bit 'cpu' set iff cpu is populated
+ *     cpu_online_map   - has bit 'cpu' set iff cpu available to scheduler
+ *  #else
+ *     cpu_possible_map - has bit 'cpu' set iff cpu is populated
+ *     cpu_present_map  - copy of cpu_possible_map
+ *     cpu_online_map   - has bit 'cpu' set iff cpu available to scheduler
+ *  #endif
+ *
+ *  In either case, NR_CPUS is fixed at compile time, as the static
+ *  size of these bitmaps.  The cpu_possible_map is fixed at boot
+ *  time, as the set of CPU id's that it is possible might ever
+ *  be plugged in at anytime during the life of that system boot.
+ *  The cpu_present_map is dynamic(*), representing which CPUs
+ *  are currently plugged in.  And cpu_online_map is the dynamic
+ *  subset of cpu_present_map, indicating those CPUs available
+ *  for scheduling.
+ *
+ *  If HOTPLUG is enabled, then cpu_possible_map is forced to have
+ *  all NR_CPUS bits set, otherwise it is just the set of CPUs that
+ *  ACPI reports present at boot.
+ *
+ *  If HOTPLUG is enabled, then cpu_present_map varies dynamically,
+ *  depending on what ACPI reports as currently plugged in, otherwise
+ *  cpu_present_map is just a copy of cpu_possible_map.
+ *
+ *  (*) Well, cpu_present_map is dynamic in the hotplug case.  If not
+ *      hotplug, it's a copy of cpu_possible_map, hence fixed at boot.
+ *
+ * Subtleties:
+ * 1) UP arch's (NR_CPUS == 1, CONFIG_SMP not defined) hardcode
+ *    assumption that their single CPU is online.  The UP
+ *    cpu_{online,possible,present}_maps are placebos.  Changing them
+ *    will have no useful affect on the following num_*_cpus()
+ *    and cpu_*() macros in the UP case.  This ugliness is a UP
+ *    optimization - don't waste any instructions or memory references
+ *    asking if you're online or how many CPUs there are if there is
+ *    only one CPU.
+ * 2) Most SMP arch's #define some of these maps to be some
+ *    other map specific to that arch.  Therefore, the following
+ *    must be #define macros, not inlines.  To see why, examine
+ *    the assembly code produced by the following.  Note that
+ *    set1() writes phys_x_map, but set2() writes x_map:
+ *        int x_map, phys_x_map;
+ *        #define set1(a) x_map = a
+ *        inline void set2(int a) { x_map = a; }
+ *        #define x_map phys_x_map
+ *        main(){ set1(3); set2(5); }
+ */
+
+extern cpumask_t cpu_possible_map;
+extern cpumask_t cpu_online_map;
+extern cpumask_t cpu_present_map;
+
+#if NR_CPUS > 1
+#define num_online_cpus()      cpus_weight(cpu_online_map)
+#define num_possible_cpus()    cpus_weight(cpu_possible_map)
+#define num_present_cpus()     cpus_weight(cpu_present_map)
+#define cpu_online(cpu)                cpu_isset((cpu), cpu_online_map)
+#define cpu_possible(cpu)      cpu_isset((cpu), cpu_possible_map)
+#define cpu_present(cpu)       cpu_isset((cpu), cpu_present_map)
+#else
+#define num_online_cpus()      1
+#define num_possible_cpus()    1
+#define num_present_cpus()     1
+#define cpu_online(cpu)                ((cpu) == 0)
+#define cpu_possible(cpu)      ((cpu) == 0)
+#define cpu_present(cpu)       ((cpu) == 0)
+#endif
+
+#define any_online_cpu(mask)                   \
+({                                             \
+       int cpu;                                \
+       for_each_cpu_mask(cpu, (mask))          \
+               if (cpu_online(cpu))            \
+                       break;                  \
+       cpu;                                    \
+})
+
+#define for_each_cpu(cpu)        for_each_cpu_mask((cpu), cpu_possible_map)
+#define for_each_online_cpu(cpu)  for_each_cpu_mask((cpu), cpu_online_map)
+#define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map)
 
-#endif /* __XEN_CPUMASK_H__ */
+#endif /* __XEN_CPUMASK_H */
index 993a6c19cf1065b93b11d7c43ac823517208368e..53a725183830f99cbf893f903836cd895658566b 100644 (file)
 #define max_t(type,x,y) \
         ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; })
 
+/**
+ * container_of - cast a member of a structure out to the containing structure
+ *
+ * @ptr:       the pointer to the member.
+ * @type:      the type of the container struct this is embedded in.
+ * @member:    the name of the member within the struct.
+ *
+ */
+#define container_of(ptr, type, member) ({                     \
+        const typeof( ((type *)0)->member ) *__mptr = (ptr);   \
+        (type *)( (char *)__mptr - offsetof(type,member) );})
+
+/*
+ * Check at compile time that something is of a particular type.
+ * Always evaluates to 1 so you may use it easily in comparisons.
+ */
+#define typecheck(type,x) \
+({     type __dummy; \
+       typeof(x) __dummy2; \
+       (void)(&__dummy == &__dummy2); \
+       1; \
+})
+
+
 #endif /* _LINUX_KERNEL_H */
 
index 7cd5295feaa5e2356aa23d6828ce8387fd0504c0..47fb6452f0428cd12f494904945c7a3918a00407 100644 (file)
@@ -8,7 +8,6 @@
 #ifndef __XEN_SCHED_IF_H__
 #define __XEN_SCHED_IF_H__
 
-//#define ADV_SCHED_HISTO
 #define BUCKETS  10
 /*300*/
 
@@ -19,11 +18,6 @@ struct schedule_data {
     void               *sched_priv;
     struct ac_timer     s_timer;        /* scheduling timer                */
     unsigned long       tick;           /* current periodic 'tick'         */
-#ifdef ADV_SCHED_HISTO
-    u32                        to_hist[BUCKETS];
-    u32                        from_hist[BUCKETS];
-    u64                        save_tsc;
-#endif
 #ifdef BUCKETS
     u32                 hist[BUCKETS];  /* for scheduler latency histogram */
 #endif
@@ -39,8 +33,6 @@ struct scheduler {
     char *opt_name;         /* option name for this scheduler    */
     unsigned int sched_id;  /* ID for this scheduler             */
 
-    int          (*init_scheduler) (void);
-    int          (*init_idle_task) (struct exec_domain *);
     int          (*alloc_task)     (struct exec_domain *);
     void         (*add_task)       (struct exec_domain *);
     void         (*free_task)      (struct domain *);
index 21e4a95c387932b4133208a92393f7015da6832c..0bfc2345b4261cbcc1c52953d8e00fdd1c2533be 100644 (file)
@@ -246,9 +246,6 @@ void new_thread(struct exec_domain *d,
                 unsigned long start_stack,
                 unsigned long start_info);
 
-extern unsigned long wait_init_idle;
-#define init_idle() clear_bit(smp_processor_id(), &wait_init_idle);
-
 #define set_current_state(_s) do { current->state = (_s); } while (0)
 void scheduler_init(void);
 void schedulers_start(void);
@@ -257,7 +254,6 @@ void sched_rem_domain(struct exec_domain *);
 long sched_ctl(struct sched_ctl_cmd *);
 long sched_adjdom(struct sched_adjdom_cmd *);
 int  sched_id();
-void init_idle_task(void);
 void domain_wake(struct exec_domain *d);
 void domain_sleep(struct exec_domain *d);
 
index 811e25ac24334bbc74f302c161ef0039b9f049c9..227830852527d1858d5b52252704db1174428ada 100644 (file)
@@ -26,19 +26,19 @@ extern void smp_send_event_check_mask(unsigned long cpu_mask);
 #define smp_send_event_check_cpu(_cpu) smp_send_event_check_mask(1<<(_cpu))
 
 /*
- * Boot processor call to load the other CPU's
+ * Prepare machine for booting other CPUs.
  */
-extern void smp_boot_cpus(void);
+extern void smp_prepare_cpus(unsigned int max_cpus);
 
 /*
- * Processor call in. Must hold processors until ..
+ * Bring a CPU up
  */
-extern void smp_callin(void);
+extern int __cpu_up(unsigned int cpunum);
 
 /*
- * Multiprocessors may now schedule
+ * Final polishing of CPUs
  */
-extern void smp_commence(void);
+extern void smp_cpus_done(unsigned int max_cpus);
 
 /*
  * Call a function on all other processors
@@ -57,12 +57,6 @@ static inline int on_each_cpu(void (*func) (void *info), void *info,
     return ret;
 }
 
-/*
- * True once the per process idle is forked
- */
-extern int smp_threads_ready;
-
-extern int smp_num_cpus;
 extern int ht_per_core;
 extern int opt_noht;
 
@@ -80,6 +74,12 @@ extern volatile int smp_msg_id;
 #define MSG_RESCHEDULE         0x0003  /* Reschedule request from master CPU*/
 #define MSG_CALL_FUNCTION       0x0004  /* Call function on all other CPUs */
 
+/*
+ * Mark the boot cpu "online" so that it can call console drivers in
+ * printk() and can access its per-cpu storage.
+ */
+void smp_prepare_boot_cpu(void);
+
 #else
 
 /*
@@ -88,16 +88,14 @@ extern volatile int smp_msg_id;
 
 #define smp_send_event_check_mask(_m)           ((void)0)
 #define smp_send_event_check_cpu(_p)            ((void)0) 
-#define smp_num_cpus                           1
+#ifndef __smp_processor_id
 #define smp_processor_id()                     0
+#endif
 #define hard_smp_processor_id()                        0
-#define smp_threads_ready                      1
-#define kernel_lock()
-#define cpu_logical_map(cpu)                   0
-#define cpu_number_map(cpu)                    0
 #define smp_call_function(func,info,retry,wait)        0
 #define on_each_cpu(func,info,retry,wait)      ({ func(info); 0; })
-#define cpu_online_map                         1
+#define num_booting_cpus()                     1
+#define smp_prepare_boot_cpu()                 do {} while (0)
 
 #endif